tree-vect-stmts.c (vectorizable_shift): Add missed test on vect_induction_def.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "rtl.h"
30 #include "ssa.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "target.h"
35 #include "gimple-pretty-print.h"
36 #include "internal-fn.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "cfgloop.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-scalar-evolution.h"
46 #include "flags.h"
47 #include "insn-config.h"
48 #include "expmed.h"
49 #include "dojump.h"
50 #include "explow.h"
51 #include "calls.h"
52 #include "emit-rtl.h"
53 #include "varasm.h"
54 #include "stmt.h"
55 #include "expr.h"
56 #include "recog.h" /* FIXME: for insn_data */
57 #include "insn-codes.h"
58 #include "optabs.h"
59 #include "diagnostic-core.h"
60 #include "tree-vectorizer.h"
61 #include "cgraph.h"
62 #include "builtins.h"
63
64 /* For lang_hooks.types.type_for_mode. */
65 #include "langhooks.h"
66
67 /* Return the vectorized type for the given statement. */
68
69 tree
70 stmt_vectype (struct _stmt_vec_info *stmt_info)
71 {
72 return STMT_VINFO_VECTYPE (stmt_info);
73 }
74
75 /* Return TRUE iff the given statement is in an inner loop relative to
76 the loop being vectorized. */
77 bool
78 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
79 {
80 gimple stmt = STMT_VINFO_STMT (stmt_info);
81 basic_block bb = gimple_bb (stmt);
82 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
83 struct loop* loop;
84
85 if (!loop_vinfo)
86 return false;
87
88 loop = LOOP_VINFO_LOOP (loop_vinfo);
89
90 return (bb->loop_father == loop->inner);
91 }
92
93 /* Record the cost of a statement, either by directly informing the
94 target model or by saving it in a vector for later processing.
95 Return a preliminary estimate of the statement's cost. */
96
97 unsigned
98 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
99 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
100 int misalign, enum vect_cost_model_location where)
101 {
102 if (body_cost_vec)
103 {
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 add_stmt_info_to_vec (body_cost_vec, count, kind,
106 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
107 misalign);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110
111 }
112 else
113 {
114 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
115 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
116 void *target_cost_data;
117
118 if (loop_vinfo)
119 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
120 else
121 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
122
123 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
124 misalign, where);
125 }
126 }
127
128 /* Return a variable of type ELEM_TYPE[NELEMS]. */
129
130 static tree
131 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
132 {
133 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
134 "vect_array");
135 }
136
137 /* ARRAY is an array of vectors created by create_vector_array.
138 Return an SSA_NAME for the vector in index N. The reference
139 is part of the vectorization of STMT and the vector is associated
140 with scalar destination SCALAR_DEST. */
141
142 static tree
143 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
144 tree array, unsigned HOST_WIDE_INT n)
145 {
146 tree vect_type, vect, vect_name, array_ref;
147 gimple new_stmt;
148
149 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
150 vect_type = TREE_TYPE (TREE_TYPE (array));
151 vect = vect_create_destination_var (scalar_dest, vect_type);
152 array_ref = build4 (ARRAY_REF, vect_type, array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (vect, array_ref);
157 vect_name = make_ssa_name (vect, new_stmt);
158 gimple_assign_set_lhs (new_stmt, vect_name);
159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160
161 return vect_name;
162 }
163
164 /* ARRAY is an array of vectors created by create_vector_array.
165 Emit code to store SSA_NAME VECT in index N of the array.
166 The store is part of the vectorization of STMT. */
167
168 static void
169 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
170 tree array, unsigned HOST_WIDE_INT n)
171 {
172 tree array_ref;
173 gimple new_stmt;
174
175 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
176 build_int_cst (size_type_node, n),
177 NULL_TREE, NULL_TREE);
178
179 new_stmt = gimple_build_assign (array_ref, vect);
180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
181 }
182
183 /* PTR is a pointer to an array of type TYPE. Return a representation
184 of *PTR. The memory reference replaces those in FIRST_DR
185 (and its group). */
186
187 static tree
188 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
189 {
190 tree mem_ref, alias_ptr_type;
191
192 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
193 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
194 /* Arrays have the same alignment as their type. */
195 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
196 return mem_ref;
197 }
198
199 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
200
201 /* Function vect_mark_relevant.
202
203 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
204
205 static void
206 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
207 enum vect_relevant relevant, bool live_p,
208 bool used_in_pattern)
209 {
210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
211 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
212 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
213 gimple pattern_stmt;
214
215 if (dump_enabled_p ())
216 dump_printf_loc (MSG_NOTE, vect_location,
217 "mark relevant %d, live %d.\n", relevant, live_p);
218
219 /* If this stmt is an original stmt in a pattern, we might need to mark its
220 related pattern stmt instead of the original stmt. However, such stmts
221 may have their own uses that are not in any pattern, in such cases the
222 stmt itself should be marked. */
223 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
224 {
225 bool found = false;
226 if (!used_in_pattern)
227 {
228 imm_use_iterator imm_iter;
229 use_operand_p use_p;
230 gimple use_stmt;
231 tree lhs;
232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
234
235 if (is_gimple_assign (stmt))
236 lhs = gimple_assign_lhs (stmt);
237 else
238 lhs = gimple_call_lhs (stmt);
239
240 /* This use is out of pattern use, if LHS has other uses that are
241 pattern uses, we should mark the stmt itself, and not the pattern
242 stmt. */
243 if (lhs && TREE_CODE (lhs) == SSA_NAME)
244 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
245 {
246 if (is_gimple_debug (USE_STMT (use_p)))
247 continue;
248 use_stmt = USE_STMT (use_p);
249
250 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
251 continue;
252
253 if (vinfo_for_stmt (use_stmt)
254 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
255 {
256 found = true;
257 break;
258 }
259 }
260 }
261
262 if (!found)
263 {
264 /* This is the last stmt in a sequence that was detected as a
265 pattern that can potentially be vectorized. Don't mark the stmt
266 as relevant/live because it's not going to be vectorized.
267 Instead mark the pattern-stmt that replaces it. */
268
269 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
270
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_NOTE, vect_location,
273 "last stmt in pattern. don't mark"
274 " relevant/live.\n");
275 stmt_info = vinfo_for_stmt (pattern_stmt);
276 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
277 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
278 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
279 stmt = pattern_stmt;
280 }
281 }
282
283 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
284 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
285 STMT_VINFO_RELEVANT (stmt_info) = relevant;
286
287 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
288 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
289 {
290 if (dump_enabled_p ())
291 dump_printf_loc (MSG_NOTE, vect_location,
292 "already marked relevant/live.\n");
293 return;
294 }
295
296 worklist->safe_push (stmt);
297 }
298
299
300 /* Function vect_stmt_relevant_p.
301
302 Return true if STMT in loop that is represented by LOOP_VINFO is
303 "relevant for vectorization".
304
305 A stmt is considered "relevant for vectorization" if:
306 - it has uses outside the loop.
307 - it has vdefs (it alters memory).
308 - control stmts in the loop (except for the exit condition).
309
310 CHECKME: what other side effects would the vectorizer allow? */
311
312 static bool
313 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
314 enum vect_relevant *relevant, bool *live_p)
315 {
316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
317 ssa_op_iter op_iter;
318 imm_use_iterator imm_iter;
319 use_operand_p use_p;
320 def_operand_p def_p;
321
322 *relevant = vect_unused_in_scope;
323 *live_p = false;
324
325 /* cond stmt other than loop exit cond. */
326 if (is_ctrl_stmt (stmt)
327 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
328 != loop_exit_ctrl_vec_info_type)
329 *relevant = vect_used_in_scope;
330
331 /* changing memory. */
332 if (gimple_code (stmt) != GIMPLE_PHI)
333 if (gimple_vdef (stmt)
334 && !gimple_clobber_p (stmt))
335 {
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: stmt has vdefs.\n");
339 *relevant = vect_used_in_scope;
340 }
341
342 /* uses outside the loop. */
343 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
344 {
345 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
346 {
347 basic_block bb = gimple_bb (USE_STMT (use_p));
348 if (!flow_bb_inside_loop_p (loop, bb))
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: used out of loop.\n");
353
354 if (is_gimple_debug (USE_STMT (use_p)))
355 continue;
356
357 /* We expect all such uses to be in the loop exit phis
358 (because of loop closed form) */
359 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
360 gcc_assert (bb == single_exit (loop)->dest);
361
362 *live_p = true;
363 }
364 }
365 }
366
367 return (*live_p || *relevant);
368 }
369
370
371 /* Function exist_non_indexing_operands_for_use_p
372
373 USE is one of the uses attached to STMT. Check if USE is
374 used in STMT for anything other than indexing an array. */
375
376 static bool
377 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
378 {
379 tree operand;
380 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
381
382 /* USE corresponds to some operand in STMT. If there is no data
383 reference in STMT, then any operand that corresponds to USE
384 is not indexing an array. */
385 if (!STMT_VINFO_DATA_REF (stmt_info))
386 return true;
387
388 /* STMT has a data_ref. FORNOW this means that its of one of
389 the following forms:
390 -1- ARRAY_REF = var
391 -2- var = ARRAY_REF
392 (This should have been verified in analyze_data_refs).
393
394 'var' in the second case corresponds to a def, not a use,
395 so USE cannot correspond to any operands that are not used
396 for array indexing.
397
398 Therefore, all we need to check is if STMT falls into the
399 first case, and whether var corresponds to USE. */
400
401 if (!gimple_assign_copy_p (stmt))
402 {
403 if (is_gimple_call (stmt)
404 && gimple_call_internal_p (stmt))
405 switch (gimple_call_internal_fn (stmt))
406 {
407 case IFN_MASK_STORE:
408 operand = gimple_call_arg (stmt, 3);
409 if (operand == use)
410 return true;
411 /* FALLTHRU */
412 case IFN_MASK_LOAD:
413 operand = gimple_call_arg (stmt, 2);
414 if (operand == use)
415 return true;
416 break;
417 default:
418 break;
419 }
420 return false;
421 }
422
423 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
424 return false;
425 operand = gimple_assign_rhs1 (stmt);
426 if (TREE_CODE (operand) != SSA_NAME)
427 return false;
428
429 if (operand == use)
430 return true;
431
432 return false;
433 }
434
435
436 /*
437 Function process_use.
438
439 Inputs:
440 - a USE in STMT in a loop represented by LOOP_VINFO
441 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
442 that defined USE. This is done by calling mark_relevant and passing it
443 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
444 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
445 be performed.
446
447 Outputs:
448 Generally, LIVE_P and RELEVANT are used to define the liveness and
449 relevance info of the DEF_STMT of this USE:
450 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
451 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
452 Exceptions:
453 - case 1: If USE is used only for address computations (e.g. array indexing),
454 which does not need to be directly vectorized, then the liveness/relevance
455 of the respective DEF_STMT is left unchanged.
456 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
457 skip DEF_STMT cause it had already been processed.
458 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
459 be modified accordingly.
460
461 Return true if everything is as expected. Return false otherwise. */
462
463 static bool
464 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
465 enum vect_relevant relevant, vec<gimple> *worklist,
466 bool force)
467 {
468 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
469 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
470 stmt_vec_info dstmt_vinfo;
471 basic_block bb, def_bb;
472 tree def;
473 gimple def_stmt;
474 enum vect_def_type dt;
475
476 /* case 1: we are only interested in uses that need to be vectorized. Uses
477 that are used for address computation are not considered relevant. */
478 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
479 return true;
480
481 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
482 {
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "not vectorized: unsupported use in stmt.\n");
486 return false;
487 }
488
489 if (!def_stmt || gimple_nop_p (def_stmt))
490 return true;
491
492 def_bb = gimple_bb (def_stmt);
493 if (!flow_bb_inside_loop_p (loop, def_bb))
494 {
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
497 return true;
498 }
499
500 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
501 DEF_STMT must have already been processed, because this should be the
502 only way that STMT, which is a reduction-phi, was put in the worklist,
503 as there should be no other uses for DEF_STMT in the loop. So we just
504 check that everything is as expected, and we are done. */
505 dstmt_vinfo = vinfo_for_stmt (def_stmt);
506 bb = gimple_bb (stmt);
507 if (gimple_code (stmt) == GIMPLE_PHI
508 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
509 && gimple_code (def_stmt) != GIMPLE_PHI
510 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
511 && bb->loop_father == def_bb->loop_father)
512 {
513 if (dump_enabled_p ())
514 dump_printf_loc (MSG_NOTE, vect_location,
515 "reduc-stmt defining reduc-phi in the same nest.\n");
516 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
517 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
518 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
519 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
520 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
521 return true;
522 }
523
524 /* case 3a: outer-loop stmt defining an inner-loop stmt:
525 outer-loop-header-bb:
526 d = def_stmt
527 inner-loop:
528 stmt # use (d)
529 outer-loop-tail-bb:
530 ... */
531 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
532 {
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE, vect_location,
535 "outer-loop def-stmt defining inner-loop stmt.\n");
536
537 switch (relevant)
538 {
539 case vect_unused_in_scope:
540 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
541 vect_used_in_scope : vect_unused_in_scope;
542 break;
543
544 case vect_used_in_outer_by_reduction:
545 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
546 relevant = vect_used_by_reduction;
547 break;
548
549 case vect_used_in_outer:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_in_scope;
552 break;
553
554 case vect_used_in_scope:
555 break;
556
557 default:
558 gcc_unreachable ();
559 }
560 }
561
562 /* case 3b: inner-loop stmt defining an outer-loop stmt:
563 outer-loop-header-bb:
564 ...
565 inner-loop:
566 d = def_stmt
567 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
568 stmt # use (d) */
569 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
570 {
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location,
573 "inner-loop def-stmt defining outer-loop stmt.\n");
574
575 switch (relevant)
576 {
577 case vect_unused_in_scope:
578 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
579 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
580 vect_used_in_outer_by_reduction : vect_unused_in_scope;
581 break;
582
583 case vect_used_by_reduction:
584 relevant = vect_used_in_outer_by_reduction;
585 break;
586
587 case vect_used_in_scope:
588 relevant = vect_used_in_outer;
589 break;
590
591 default:
592 gcc_unreachable ();
593 }
594 }
595
596 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
597 is_pattern_stmt_p (stmt_vinfo));
598 return true;
599 }
600
601
602 /* Function vect_mark_stmts_to_be_vectorized.
603
604 Not all stmts in the loop need to be vectorized. For example:
605
606 for i...
607 for j...
608 1. T0 = i + j
609 2. T1 = a[T0]
610
611 3. j = j + 1
612
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
615
616 This pass detects such stmts. */
617
618 bool
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
620 {
621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
623 unsigned int nbbs = loop->num_nodes;
624 gimple_stmt_iterator si;
625 gimple stmt;
626 unsigned int i;
627 stmt_vec_info stmt_vinfo;
628 basic_block bb;
629 gimple phi;
630 bool live_p;
631 enum vect_relevant relevant, tmp_relevant;
632 enum vect_def_type def_type;
633
634 if (dump_enabled_p ())
635 dump_printf_loc (MSG_NOTE, vect_location,
636 "=== vect_mark_stmts_to_be_vectorized ===\n");
637
638 auto_vec<gimple, 64> worklist;
639
640 /* 1. Init worklist. */
641 for (i = 0; i < nbbs; i++)
642 {
643 bb = bbs[i];
644 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
645 {
646 phi = gsi_stmt (si);
647 if (dump_enabled_p ())
648 {
649 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
651 }
652
653 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
654 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
655 }
656 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
657 {
658 stmt = gsi_stmt (si);
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
663 }
664
665 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
666 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
667 }
668 }
669
670 /* 2. Process_worklist */
671 while (worklist.length () > 0)
672 {
673 use_operand_p use_p;
674 ssa_op_iter iter;
675
676 stmt = worklist.pop ();
677 if (dump_enabled_p ())
678 {
679 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
680 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
681 }
682
683 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
684 (DEF_STMT) as relevant/irrelevant and live/dead according to the
685 liveness and relevance properties of STMT. */
686 stmt_vinfo = vinfo_for_stmt (stmt);
687 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
688 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
689
690 /* Generally, the liveness and relevance properties of STMT are
691 propagated as is to the DEF_STMTs of its USEs:
692 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
693 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
694
695 One exception is when STMT has been identified as defining a reduction
696 variable; in this case we set the liveness/relevance as follows:
697 live_p = false
698 relevant = vect_used_by_reduction
699 This is because we distinguish between two kinds of relevant stmts -
700 those that are used by a reduction computation, and those that are
701 (also) used by a regular computation. This allows us later on to
702 identify stmts that are used solely by a reduction, and therefore the
703 order of the results that they produce does not have to be kept. */
704
705 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
706 tmp_relevant = relevant;
707 switch (def_type)
708 {
709 case vect_reduction_def:
710 switch (tmp_relevant)
711 {
712 case vect_unused_in_scope:
713 relevant = vect_used_by_reduction;
714 break;
715
716 case vect_used_by_reduction:
717 if (gimple_code (stmt) == GIMPLE_PHI)
718 break;
719 /* fall through */
720
721 default:
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of reduction.\n");
725 return false;
726 }
727
728 live_p = false;
729 break;
730
731 case vect_nested_cycle:
732 if (tmp_relevant != vect_unused_in_scope
733 && tmp_relevant != vect_used_in_outer_by_reduction
734 && tmp_relevant != vect_used_in_outer)
735 {
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "unsupported use of nested cycle.\n");
739
740 return false;
741 }
742
743 live_p = false;
744 break;
745
746 case vect_double_reduction_def:
747 if (tmp_relevant != vect_unused_in_scope
748 && tmp_relevant != vect_used_by_reduction)
749 {
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "unsupported use of double reduction.\n");
753
754 return false;
755 }
756
757 live_p = false;
758 break;
759
760 default:
761 break;
762 }
763
764 if (is_pattern_stmt_p (stmt_vinfo))
765 {
766 /* Pattern statements are not inserted into the code, so
767 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
768 have to scan the RHS or function arguments instead. */
769 if (is_gimple_assign (stmt))
770 {
771 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
772 tree op = gimple_assign_rhs1 (stmt);
773
774 i = 1;
775 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
776 {
777 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
778 live_p, relevant, &worklist, false)
779 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
780 live_p, relevant, &worklist, false))
781 return false;
782 i = 2;
783 }
784 for (; i < gimple_num_ops (stmt); i++)
785 {
786 op = gimple_op (stmt, i);
787 if (TREE_CODE (op) == SSA_NAME
788 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
789 &worklist, false))
790 return false;
791 }
792 }
793 else if (is_gimple_call (stmt))
794 {
795 for (i = 0; i < gimple_call_num_args (stmt); i++)
796 {
797 tree arg = gimple_call_arg (stmt, i);
798 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
799 &worklist, false))
800 return false;
801 }
802 }
803 }
804 else
805 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
806 {
807 tree op = USE_FROM_PTR (use_p);
808 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
809 &worklist, false))
810 return false;
811 }
812
813 if (STMT_VINFO_GATHER_P (stmt_vinfo))
814 {
815 tree off;
816 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
817 gcc_assert (decl);
818 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
819 &worklist, true))
820 return false;
821 }
822 } /* while worklist */
823
824 return true;
825 }
826
827
828 /* Function vect_model_simple_cost.
829
830 Models cost for simple operations, i.e. those that only emit ncopies of a
831 single op. Right now, this does not account for multiple insns that could
832 be generated for the single vector op. We will handle that shortly. */
833
834 void
835 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
836 enum vect_def_type *dt,
837 stmt_vector_for_cost *prologue_cost_vec,
838 stmt_vector_for_cost *body_cost_vec)
839 {
840 int i;
841 int inside_cost = 0, prologue_cost = 0;
842
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info))
845 return;
846
847 /* FORNOW: Assuming maximum 2 args per stmts. */
848 for (i = 0; i < 2; i++)
849 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
850 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
851 stmt_info, 0, vect_prologue);
852
853 /* Pass the inside-of-loop statements to the target-specific cost model. */
854 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
855 stmt_info, 0, vect_body);
856
857 if (dump_enabled_p ())
858 dump_printf_loc (MSG_NOTE, vect_location,
859 "vect_model_simple_cost: inside_cost = %d, "
860 "prologue_cost = %d .\n", inside_cost, prologue_cost);
861 }
862
863
864 /* Model cost for type demotion and promotion operations. PWR is normally
865 zero for single-step promotions and demotions. It will be one if
866 two-step promotion/demotion is required, and so on. Each additional
867 step doubles the number of instructions required. */
868
869 static void
870 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
871 enum vect_def_type *dt, int pwr)
872 {
873 int i, tmp;
874 int inside_cost = 0, prologue_cost = 0;
875 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
876 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
877 void *target_cost_data;
878
879 /* The SLP costs were already calculated during SLP tree build. */
880 if (PURE_SLP_STMT (stmt_info))
881 return;
882
883 if (loop_vinfo)
884 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
885 else
886 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
887
888 for (i = 0; i < pwr + 1; i++)
889 {
890 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
891 (i + 1) : i;
892 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
893 vec_promote_demote, stmt_info, 0,
894 vect_body);
895 }
896
897 /* FORNOW: Assuming maximum 2 args per stmts. */
898 for (i = 0; i < 2; i++)
899 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
900 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
901 stmt_info, 0, vect_prologue);
902
903 if (dump_enabled_p ())
904 dump_printf_loc (MSG_NOTE, vect_location,
905 "vect_model_promotion_demotion_cost: inside_cost = %d, "
906 "prologue_cost = %d .\n", inside_cost, prologue_cost);
907 }
908
909 /* Function vect_cost_group_size
910
911 For grouped load or store, return the group_size only if it is the first
912 load or store of a group, else return 1. This ensures that group size is
913 only returned once per group. */
914
915 static int
916 vect_cost_group_size (stmt_vec_info stmt_info)
917 {
918 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
919
920 if (first_stmt == STMT_VINFO_STMT (stmt_info))
921 return GROUP_SIZE (stmt_info);
922
923 return 1;
924 }
925
926
927 /* Function vect_model_store_cost
928
929 Models cost for stores. In the case of grouped accesses, one access
930 has the overhead of the grouped access attributed to it. */
931
932 void
933 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
934 bool store_lanes_p, enum vect_def_type dt,
935 slp_tree slp_node,
936 stmt_vector_for_cost *prologue_cost_vec,
937 stmt_vector_for_cost *body_cost_vec)
938 {
939 int group_size;
940 unsigned int inside_cost = 0, prologue_cost = 0;
941 struct data_reference *first_dr;
942 gimple first_stmt;
943
944 if (dt == vect_constant_def || dt == vect_external_def)
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
947
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
950 {
951 if (slp_node)
952 {
953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
954 group_size = 1;
955 }
956 else
957 {
958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
959 group_size = vect_cost_group_size (stmt_info);
960 }
961
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
963 }
964 /* Not a grouped access. */
965 else
966 {
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
969 }
970
971 /* We assume that the cost of a single store-lanes instruction is
972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1
976 && !STMT_VINFO_STRIDED_P (stmt_info))
977 {
978 /* Uses a high and low interleave or shuffle operations for each
979 needed permute. */
980 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
981 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
982 stmt_info, 0, vect_body);
983
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE, vect_location,
986 "vect_model_store_cost: strided group_size = %d .\n",
987 group_size);
988 }
989
990 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
991 /* Costs of the stores. */
992 if (STMT_VINFO_STRIDED_P (stmt_info)
993 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
994 {
995 /* N scalar stores plus extracting the elements. */
996 inside_cost += record_stmt_cost (body_cost_vec,
997 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
998 scalar_store, stmt_info, 0, vect_body);
999 }
1000 else
1001 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1002
1003 if (STMT_VINFO_STRIDED_P (stmt_info))
1004 inside_cost += record_stmt_cost (body_cost_vec,
1005 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1006 vec_to_scalar, stmt_info, 0, vect_body);
1007
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: inside_cost = %d, "
1011 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1012 }
1013
1014
1015 /* Calculate cost of DR's memory access. */
1016 void
1017 vect_get_store_cost (struct data_reference *dr, int ncopies,
1018 unsigned int *inside_cost,
1019 stmt_vector_for_cost *body_cost_vec)
1020 {
1021 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1022 gimple stmt = DR_STMT (dr);
1023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1024
1025 switch (alignment_support_scheme)
1026 {
1027 case dr_aligned:
1028 {
1029 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1030 vector_store, stmt_info, 0,
1031 vect_body);
1032
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_NOTE, vect_location,
1035 "vect_model_store_cost: aligned.\n");
1036 break;
1037 }
1038
1039 case dr_unaligned_supported:
1040 {
1041 /* Here, we assign an additional cost for the unaligned store. */
1042 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1043 unaligned_store, stmt_info,
1044 DR_MISALIGNMENT (dr), vect_body);
1045 if (dump_enabled_p ())
1046 dump_printf_loc (MSG_NOTE, vect_location,
1047 "vect_model_store_cost: unaligned supported by "
1048 "hardware.\n");
1049 break;
1050 }
1051
1052 case dr_unaligned_unsupported:
1053 {
1054 *inside_cost = VECT_MAX_COST;
1055
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1058 "vect_model_store_cost: unsupported access.\n");
1059 break;
1060 }
1061
1062 default:
1063 gcc_unreachable ();
1064 }
1065 }
1066
1067
1068 /* Function vect_model_load_cost
1069
1070 Models cost for loads. In the case of grouped accesses, the last access
1071 has the overhead of the grouped access attributed to it. Since unaligned
1072 accesses are supported for loads, we also account for the costs of the
1073 access scheme chosen. */
1074
1075 void
1076 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1077 bool load_lanes_p, slp_tree slp_node,
1078 stmt_vector_for_cost *prologue_cost_vec,
1079 stmt_vector_for_cost *body_cost_vec)
1080 {
1081 int group_size;
1082 gimple first_stmt;
1083 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1084 unsigned int inside_cost = 0, prologue_cost = 0;
1085
1086 /* Grouped accesses? */
1087 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1089 {
1090 group_size = vect_cost_group_size (stmt_info);
1091 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1092 }
1093 /* Not a grouped access. */
1094 else
1095 {
1096 group_size = 1;
1097 first_dr = dr;
1098 }
1099
1100 /* We assume that the cost of a single load-lanes instruction is
1101 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1102 access is instead being provided by a load-and-permute operation,
1103 include the cost of the permutes. */
1104 if (!load_lanes_p && group_size > 1
1105 && !STMT_VINFO_STRIDED_P (stmt_info))
1106 {
1107 /* Uses an even and odd extract operations or shuffle operations
1108 for each needed permute. */
1109 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1110 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1111 stmt_info, 0, vect_body);
1112
1113 if (dump_enabled_p ())
1114 dump_printf_loc (MSG_NOTE, vect_location,
1115 "vect_model_load_cost: strided group_size = %d .\n",
1116 group_size);
1117 }
1118
1119 /* The loads themselves. */
1120 if (STMT_VINFO_STRIDED_P (stmt_info)
1121 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1122 {
1123 /* N scalar loads plus gathering them into a vector. */
1124 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1125 inside_cost += record_stmt_cost (body_cost_vec,
1126 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1127 scalar_load, stmt_info, 0, vect_body);
1128 }
1129 else
1130 vect_get_load_cost (first_dr, ncopies,
1131 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1132 || group_size > 1 || slp_node),
1133 &inside_cost, &prologue_cost,
1134 prologue_cost_vec, body_cost_vec, true);
1135 if (STMT_VINFO_STRIDED_P (stmt_info))
1136 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1137 stmt_info, 0, vect_body);
1138
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_load_cost: inside_cost = %d, "
1142 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1143 }
1144
1145
1146 /* Calculate cost of DR's memory access. */
1147 void
1148 vect_get_load_cost (struct data_reference *dr, int ncopies,
1149 bool add_realign_cost, unsigned int *inside_cost,
1150 unsigned int *prologue_cost,
1151 stmt_vector_for_cost *prologue_cost_vec,
1152 stmt_vector_for_cost *body_cost_vec,
1153 bool record_prologue_costs)
1154 {
1155 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1156 gimple stmt = DR_STMT (dr);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1158
1159 switch (alignment_support_scheme)
1160 {
1161 case dr_aligned:
1162 {
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1164 stmt_info, 0, vect_body);
1165
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: aligned.\n");
1169
1170 break;
1171 }
1172 case dr_unaligned_supported:
1173 {
1174 /* Here, we assign an additional cost for the unaligned load. */
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1176 unaligned_load, stmt_info,
1177 DR_MISALIGNMENT (dr), vect_body);
1178
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: unaligned supported by "
1182 "hardware.\n");
1183
1184 break;
1185 }
1186 case dr_explicit_realign:
1187 {
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1189 vector_load, stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1191 vec_perm, stmt_info, 0, vect_body);
1192
1193 /* FIXME: If the misalignment remains fixed across the iterations of
1194 the containing loop, the following cost should be added to the
1195 prologue costs. */
1196 if (targetm.vectorize.builtin_mask_for_load)
1197 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1198 stmt_info, 0, vect_body);
1199
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign\n");
1203
1204 break;
1205 }
1206 case dr_explicit_realign_optimized:
1207 {
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_model_load_cost: unaligned software "
1211 "pipelined.\n");
1212
1213 /* Unaligned software pipeline has a load of an address, an initial
1214 load, and possibly a mask operation to "prime" the loop. However,
1215 if this is an access in a group of loads, which provide grouped
1216 access, then the above cost should only be considered for one
1217 access in the group. Inside the loop, there is a load op
1218 and a realignment op. */
1219
1220 if (add_realign_cost && record_prologue_costs)
1221 {
1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
1225 if (targetm.vectorize.builtin_mask_for_load)
1226 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1227 vector_stmt, stmt_info,
1228 0, vect_prologue);
1229 }
1230
1231 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1232 stmt_info, 0, vect_body);
1233 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1234 stmt_info, 0, vect_body);
1235
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "vect_model_load_cost: explicit realign optimized"
1239 "\n");
1240
1241 break;
1242 }
1243
1244 case dr_unaligned_unsupported:
1245 {
1246 *inside_cost = VECT_MAX_COST;
1247
1248 if (dump_enabled_p ())
1249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1250 "vect_model_load_cost: unsupported access.\n");
1251 break;
1252 }
1253
1254 default:
1255 gcc_unreachable ();
1256 }
1257 }
1258
1259 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1260 the loop preheader for the vectorized stmt STMT. */
1261
1262 static void
1263 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1264 {
1265 if (gsi)
1266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1267 else
1268 {
1269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1271
1272 if (loop_vinfo)
1273 {
1274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1275 basic_block new_bb;
1276 edge pe;
1277
1278 if (nested_in_vect_loop_p (loop, stmt))
1279 loop = loop->inner;
1280
1281 pe = loop_preheader_edge (loop);
1282 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1283 gcc_assert (!new_bb);
1284 }
1285 else
1286 {
1287 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1288 basic_block bb;
1289 gimple_stmt_iterator gsi_bb_start;
1290
1291 gcc_assert (bb_vinfo);
1292 bb = BB_VINFO_BB (bb_vinfo);
1293 gsi_bb_start = gsi_after_labels (bb);
1294 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1295 }
1296 }
1297
1298 if (dump_enabled_p ())
1299 {
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "created new init_stmt: ");
1302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1303 }
1304 }
1305
1306 /* Function vect_init_vector.
1307
1308 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1309 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1310 vector type a vector with all elements equal to VAL is created first.
1311 Place the initialization at BSI if it is not NULL. Otherwise, place the
1312 initialization at the loop preheader.
1313 Return the DEF of INIT_STMT.
1314 It will be used in the vectorization of STMT. */
1315
1316 tree
1317 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1318 {
1319 tree new_var;
1320 gimple init_stmt;
1321 tree vec_oprnd;
1322 tree new_temp;
1323
1324 if (TREE_CODE (type) == VECTOR_TYPE
1325 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1326 {
1327 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1328 {
1329 if (CONSTANT_CLASS_P (val))
1330 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1331 else
1332 {
1333 new_temp = make_ssa_name (TREE_TYPE (type));
1334 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1335 vect_init_vector_1 (stmt, init_stmt, gsi);
1336 val = new_temp;
1337 }
1338 }
1339 val = build_vector_from_val (type, val);
1340 }
1341
1342 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1343 init_stmt = gimple_build_assign (new_var, val);
1344 new_temp = make_ssa_name (new_var, init_stmt);
1345 gimple_assign_set_lhs (init_stmt, new_temp);
1346 vect_init_vector_1 (stmt, init_stmt, gsi);
1347 vec_oprnd = gimple_assign_lhs (init_stmt);
1348 return vec_oprnd;
1349 }
1350
1351
1352 /* Function vect_get_vec_def_for_operand.
1353
1354 OP is an operand in STMT. This function returns a (vector) def that will be
1355 used in the vectorized stmt for STMT.
1356
1357 In the case that OP is an SSA_NAME which is defined in the loop, then
1358 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1359
1360 In case OP is an invariant or constant, a new stmt that creates a vector def
1361 needs to be introduced. */
1362
1363 tree
1364 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1365 {
1366 tree vec_oprnd;
1367 gimple vec_stmt;
1368 gimple def_stmt;
1369 stmt_vec_info def_stmt_info = NULL;
1370 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1371 unsigned int nunits;
1372 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1373 tree def;
1374 enum vect_def_type dt;
1375 bool is_simple_use;
1376 tree vector_type;
1377
1378 if (dump_enabled_p ())
1379 {
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "vect_get_vec_def_for_operand: ");
1382 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1383 dump_printf (MSG_NOTE, "\n");
1384 }
1385
1386 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1387 &def_stmt, &def, &dt);
1388 gcc_assert (is_simple_use);
1389 if (dump_enabled_p ())
1390 {
1391 int loc_printed = 0;
1392 if (def)
1393 {
1394 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1395 loc_printed = 1;
1396 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1397 dump_printf (MSG_NOTE, "\n");
1398 }
1399 if (def_stmt)
1400 {
1401 if (loc_printed)
1402 dump_printf (MSG_NOTE, " def_stmt = ");
1403 else
1404 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1405 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1406 }
1407 }
1408
1409 switch (dt)
1410 {
1411 /* Case 1: operand is a constant. */
1412 case vect_constant_def:
1413 {
1414 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1415 gcc_assert (vector_type);
1416 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1417
1418 if (scalar_def)
1419 *scalar_def = op;
1420
1421 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE, vect_location,
1424 "Create vector_cst. nunits = %d\n", nunits);
1425
1426 return vect_init_vector (stmt, op, vector_type, NULL);
1427 }
1428
1429 /* Case 2: operand is defined outside the loop - loop invariant. */
1430 case vect_external_def:
1431 {
1432 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1433 gcc_assert (vector_type);
1434
1435 if (scalar_def)
1436 *scalar_def = def;
1437
1438 /* Create 'vec_inv = {inv,inv,..,inv}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1441
1442 return vect_init_vector (stmt, def, vector_type, NULL);
1443 }
1444
1445 /* Case 3: operand is defined inside the loop. */
1446 case vect_internal_def:
1447 {
1448 if (scalar_def)
1449 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1450
1451 /* Get the def from the vectorized stmt. */
1452 def_stmt_info = vinfo_for_stmt (def_stmt);
1453
1454 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1455 /* Get vectorized pattern statement. */
1456 if (!vec_stmt
1457 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1458 && !STMT_VINFO_RELEVANT (def_stmt_info))
1459 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1460 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1461 gcc_assert (vec_stmt);
1462 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1463 vec_oprnd = PHI_RESULT (vec_stmt);
1464 else if (is_gimple_call (vec_stmt))
1465 vec_oprnd = gimple_call_lhs (vec_stmt);
1466 else
1467 vec_oprnd = gimple_assign_lhs (vec_stmt);
1468 return vec_oprnd;
1469 }
1470
1471 /* Case 4: operand is defined by a loop header phi - reduction */
1472 case vect_reduction_def:
1473 case vect_double_reduction_def:
1474 case vect_nested_cycle:
1475 {
1476 struct loop *loop;
1477
1478 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 loop = (gimple_bb (def_stmt))->loop_father;
1480
1481 /* Get the def before the loop */
1482 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1483 return get_initial_def_for_reduction (stmt, op, scalar_def);
1484 }
1485
1486 /* Case 5: operand is defined by loop-header phi - induction. */
1487 case vect_induction_def:
1488 {
1489 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1490
1491 /* Get the def from the vectorized stmt. */
1492 def_stmt_info = vinfo_for_stmt (def_stmt);
1493 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1494 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1495 vec_oprnd = PHI_RESULT (vec_stmt);
1496 else
1497 vec_oprnd = gimple_get_lhs (vec_stmt);
1498 return vec_oprnd;
1499 }
1500
1501 default:
1502 gcc_unreachable ();
1503 }
1504 }
1505
1506
1507 /* Function vect_get_vec_def_for_stmt_copy
1508
1509 Return a vector-def for an operand. This function is used when the
1510 vectorized stmt to be created (by the caller to this function) is a "copy"
1511 created in case the vectorized result cannot fit in one vector, and several
1512 copies of the vector-stmt are required. In this case the vector-def is
1513 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1514 of the stmt that defines VEC_OPRND.
1515 DT is the type of the vector def VEC_OPRND.
1516
1517 Context:
1518 In case the vectorization factor (VF) is bigger than the number
1519 of elements that can fit in a vectype (nunits), we have to generate
1520 more than one vector stmt to vectorize the scalar stmt. This situation
1521 arises when there are multiple data-types operated upon in the loop; the
1522 smallest data-type determines the VF, and as a result, when vectorizing
1523 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1524 vector stmt (each computing a vector of 'nunits' results, and together
1525 computing 'VF' results in each iteration). This function is called when
1526 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1527 which VF=16 and nunits=4, so the number of copies required is 4):
1528
1529 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1530
1531 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1532 VS1.1: vx.1 = memref1 VS1.2
1533 VS1.2: vx.2 = memref2 VS1.3
1534 VS1.3: vx.3 = memref3
1535
1536 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1537 VSnew.1: vz1 = vx.1 + ... VSnew.2
1538 VSnew.2: vz2 = vx.2 + ... VSnew.3
1539 VSnew.3: vz3 = vx.3 + ...
1540
1541 The vectorization of S1 is explained in vectorizable_load.
1542 The vectorization of S2:
1543 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1544 the function 'vect_get_vec_def_for_operand' is called to
1545 get the relevant vector-def for each operand of S2. For operand x it
1546 returns the vector-def 'vx.0'.
1547
1548 To create the remaining copies of the vector-stmt (VSnew.j), this
1549 function is called to get the relevant vector-def for each operand. It is
1550 obtained from the respective VS1.j stmt, which is recorded in the
1551 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1552
1553 For example, to obtain the vector-def 'vx.1' in order to create the
1554 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1555 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1556 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1557 and return its def ('vx.1').
1558 Overall, to create the above sequence this function will be called 3 times:
1559 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1560 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1561 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1562
1563 tree
1564 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1565 {
1566 gimple vec_stmt_for_operand;
1567 stmt_vec_info def_stmt_info;
1568
1569 /* Do nothing; can reuse same def. */
1570 if (dt == vect_external_def || dt == vect_constant_def )
1571 return vec_oprnd;
1572
1573 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1574 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1575 gcc_assert (def_stmt_info);
1576 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1577 gcc_assert (vec_stmt_for_operand);
1578 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1579 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1580 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1581 else
1582 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1583 return vec_oprnd;
1584 }
1585
1586
1587 /* Get vectorized definitions for the operands to create a copy of an original
1588 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1589
1590 static void
1591 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1592 vec<tree> *vec_oprnds0,
1593 vec<tree> *vec_oprnds1)
1594 {
1595 tree vec_oprnd = vec_oprnds0->pop ();
1596
1597 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1598 vec_oprnds0->quick_push (vec_oprnd);
1599
1600 if (vec_oprnds1 && vec_oprnds1->length ())
1601 {
1602 vec_oprnd = vec_oprnds1->pop ();
1603 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1604 vec_oprnds1->quick_push (vec_oprnd);
1605 }
1606 }
1607
1608
1609 /* Get vectorized definitions for OP0 and OP1.
1610 REDUC_INDEX is the index of reduction operand in case of reduction,
1611 and -1 otherwise. */
1612
1613 void
1614 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1615 vec<tree> *vec_oprnds0,
1616 vec<tree> *vec_oprnds1,
1617 slp_tree slp_node, int reduc_index)
1618 {
1619 if (slp_node)
1620 {
1621 int nops = (op1 == NULL_TREE) ? 1 : 2;
1622 auto_vec<tree> ops (nops);
1623 auto_vec<vec<tree> > vec_defs (nops);
1624
1625 ops.quick_push (op0);
1626 if (op1)
1627 ops.quick_push (op1);
1628
1629 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1630
1631 *vec_oprnds0 = vec_defs[0];
1632 if (op1)
1633 *vec_oprnds1 = vec_defs[1];
1634 }
1635 else
1636 {
1637 tree vec_oprnd;
1638
1639 vec_oprnds0->create (1);
1640 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1641 vec_oprnds0->quick_push (vec_oprnd);
1642
1643 if (op1)
1644 {
1645 vec_oprnds1->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1647 vec_oprnds1->quick_push (vec_oprnd);
1648 }
1649 }
1650 }
1651
1652
1653 /* Function vect_finish_stmt_generation.
1654
1655 Insert a new stmt. */
1656
1657 void
1658 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1659 gimple_stmt_iterator *gsi)
1660 {
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1664
1665 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1666
1667 if (!gsi_end_p (*gsi)
1668 && gimple_has_mem_ops (vec_stmt))
1669 {
1670 gimple at_stmt = gsi_stmt (*gsi);
1671 tree vuse = gimple_vuse (at_stmt);
1672 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1673 {
1674 tree vdef = gimple_vdef (at_stmt);
1675 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1676 /* If we have an SSA vuse and insert a store, update virtual
1677 SSA form to avoid triggering the renamer. Do so only
1678 if we can easily see all uses - which is what almost always
1679 happens with the way vectorized stmts are inserted. */
1680 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1681 && ((is_gimple_assign (vec_stmt)
1682 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1683 || (is_gimple_call (vec_stmt)
1684 && !(gimple_call_flags (vec_stmt)
1685 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1686 {
1687 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1688 gimple_set_vdef (vec_stmt, new_vdef);
1689 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1690 }
1691 }
1692 }
1693 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1694
1695 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1696 bb_vinfo));
1697
1698 if (dump_enabled_p ())
1699 {
1700 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1701 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1702 }
1703
1704 gimple_set_location (vec_stmt, gimple_location (stmt));
1705
1706 /* While EH edges will generally prevent vectorization, stmt might
1707 e.g. be in a must-not-throw region. Ensure newly created stmts
1708 that could throw are part of the same region. */
1709 int lp_nr = lookup_stmt_eh_lp (stmt);
1710 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1711 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1712 }
1713
1714 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1715 a function declaration if the target has a vectorized version
1716 of the function, or NULL_TREE if the function cannot be vectorized. */
1717
1718 tree
1719 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1720 {
1721 tree fndecl = gimple_call_fndecl (call);
1722
1723 /* We only handle functions that do not read or clobber memory -- i.e.
1724 const or novops ones. */
1725 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1726 return NULL_TREE;
1727
1728 if (!fndecl
1729 || TREE_CODE (fndecl) != FUNCTION_DECL
1730 || !DECL_BUILT_IN (fndecl))
1731 return NULL_TREE;
1732
1733 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1734 vectype_in);
1735 }
1736
1737
1738 static tree permute_vec_elements (tree, tree, tree, gimple,
1739 gimple_stmt_iterator *);
1740
1741
1742 /* Function vectorizable_mask_load_store.
1743
1744 Check if STMT performs a conditional load or store that can be vectorized.
1745 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1746 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1747 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1748
1749 static bool
1750 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1751 gimple *vec_stmt, slp_tree slp_node)
1752 {
1753 tree vec_dest = NULL;
1754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1755 stmt_vec_info prev_stmt_info;
1756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1757 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1758 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1759 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1760 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1761 tree elem_type;
1762 gimple new_stmt;
1763 tree dummy;
1764 tree dataref_ptr = NULL_TREE;
1765 gimple ptr_incr;
1766 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1767 int ncopies;
1768 int i, j;
1769 bool inv_p;
1770 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1771 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1772 int gather_scale = 1;
1773 enum vect_def_type gather_dt = vect_unknown_def_type;
1774 bool is_store;
1775 tree mask;
1776 gimple def_stmt;
1777 tree def;
1778 enum vect_def_type dt;
1779
1780 if (slp_node != NULL)
1781 return false;
1782
1783 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1784 gcc_assert (ncopies >= 1);
1785
1786 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1787 mask = gimple_call_arg (stmt, 2);
1788 if (TYPE_PRECISION (TREE_TYPE (mask))
1789 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1790 return false;
1791
1792 /* FORNOW. This restriction should be relaxed. */
1793 if (nested_in_vect_loop && ncopies > 1)
1794 {
1795 if (dump_enabled_p ())
1796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1797 "multiple types in nested loop.");
1798 return false;
1799 }
1800
1801 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1802 return false;
1803
1804 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1805 return false;
1806
1807 if (!STMT_VINFO_DATA_REF (stmt_info))
1808 return false;
1809
1810 elem_type = TREE_TYPE (vectype);
1811
1812 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1813 return false;
1814
1815 if (STMT_VINFO_STRIDED_P (stmt_info))
1816 return false;
1817
1818 if (STMT_VINFO_GATHER_P (stmt_info))
1819 {
1820 gimple def_stmt;
1821 tree def;
1822 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1823 &gather_off, &gather_scale);
1824 gcc_assert (gather_decl);
1825 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1826 &def_stmt, &def, &gather_dt,
1827 &gather_off_vectype))
1828 {
1829 if (dump_enabled_p ())
1830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1831 "gather index use not simple.");
1832 return false;
1833 }
1834
1835 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1836 tree masktype
1837 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1838 if (TREE_CODE (masktype) == INTEGER_TYPE)
1839 {
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "masked gather with integer mask not supported.");
1843 return false;
1844 }
1845 }
1846 else if (tree_int_cst_compare (nested_in_vect_loop
1847 ? STMT_VINFO_DR_STEP (stmt_info)
1848 : DR_STEP (dr), size_zero_node) <= 0)
1849 return false;
1850 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1851 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1852 return false;
1853
1854 if (TREE_CODE (mask) != SSA_NAME)
1855 return false;
1856
1857 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1858 &def_stmt, &def, &dt))
1859 return false;
1860
1861 if (is_store)
1862 {
1863 tree rhs = gimple_call_arg (stmt, 3);
1864 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1865 &def_stmt, &def, &dt))
1866 return false;
1867 }
1868
1869 if (!vec_stmt) /* transformation not required. */
1870 {
1871 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1872 if (is_store)
1873 vect_model_store_cost (stmt_info, ncopies, false, dt,
1874 NULL, NULL, NULL);
1875 else
1876 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1877 return true;
1878 }
1879
1880 /** Transform. **/
1881
1882 if (STMT_VINFO_GATHER_P (stmt_info))
1883 {
1884 tree vec_oprnd0 = NULL_TREE, op;
1885 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1886 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1887 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1888 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1889 tree mask_perm_mask = NULL_TREE;
1890 edge pe = loop_preheader_edge (loop);
1891 gimple_seq seq;
1892 basic_block new_bb;
1893 enum { NARROW, NONE, WIDEN } modifier;
1894 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1895
1896 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1897 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1898 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1899 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1900 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1901 scaletype = TREE_VALUE (arglist);
1902 gcc_checking_assert (types_compatible_p (srctype, rettype)
1903 && types_compatible_p (srctype, masktype));
1904
1905 if (nunits == gather_off_nunits)
1906 modifier = NONE;
1907 else if (nunits == gather_off_nunits / 2)
1908 {
1909 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1910 modifier = WIDEN;
1911
1912 for (i = 0; i < gather_off_nunits; ++i)
1913 sel[i] = i | nunits;
1914
1915 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1916 }
1917 else if (nunits == gather_off_nunits * 2)
1918 {
1919 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1920 modifier = NARROW;
1921
1922 for (i = 0; i < nunits; ++i)
1923 sel[i] = i < gather_off_nunits
1924 ? i : i + nunits - gather_off_nunits;
1925
1926 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1927 ncopies *= 2;
1928 for (i = 0; i < nunits; ++i)
1929 sel[i] = i | gather_off_nunits;
1930 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1931 }
1932 else
1933 gcc_unreachable ();
1934
1935 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1936
1937 ptr = fold_convert (ptrtype, gather_base);
1938 if (!is_gimple_min_invariant (ptr))
1939 {
1940 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1941 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1942 gcc_assert (!new_bb);
1943 }
1944
1945 scale = build_int_cst (scaletype, gather_scale);
1946
1947 prev_stmt_info = NULL;
1948 for (j = 0; j < ncopies; ++j)
1949 {
1950 if (modifier == WIDEN && (j & 1))
1951 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1952 perm_mask, stmt, gsi);
1953 else if (j == 0)
1954 op = vec_oprnd0
1955 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1956 else
1957 op = vec_oprnd0
1958 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1959
1960 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1961 {
1962 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1963 == TYPE_VECTOR_SUBPARTS (idxtype));
1964 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1965 var = make_ssa_name (var);
1966 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1967 new_stmt
1968 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1969 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1970 op = var;
1971 }
1972
1973 if (mask_perm_mask && (j & 1))
1974 mask_op = permute_vec_elements (mask_op, mask_op,
1975 mask_perm_mask, stmt, gsi);
1976 else
1977 {
1978 if (j == 0)
1979 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1980 else
1981 {
1982 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1983 &def_stmt, &def, &dt);
1984 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1985 }
1986
1987 mask_op = vec_mask;
1988 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1989 {
1990 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1991 == TYPE_VECTOR_SUBPARTS (masktype));
1992 var = vect_get_new_vect_var (masktype, vect_simple_var,
1993 NULL);
1994 var = make_ssa_name (var);
1995 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1996 new_stmt
1997 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1999 mask_op = var;
2000 }
2001 }
2002
2003 new_stmt
2004 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2005 scale);
2006
2007 if (!useless_type_conversion_p (vectype, rettype))
2008 {
2009 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2010 == TYPE_VECTOR_SUBPARTS (rettype));
2011 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2012 op = make_ssa_name (var, new_stmt);
2013 gimple_call_set_lhs (new_stmt, op);
2014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015 var = make_ssa_name (vec_dest);
2016 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2017 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2018 }
2019 else
2020 {
2021 var = make_ssa_name (vec_dest, new_stmt);
2022 gimple_call_set_lhs (new_stmt, var);
2023 }
2024
2025 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2026
2027 if (modifier == NARROW)
2028 {
2029 if ((j & 1) == 0)
2030 {
2031 prev_res = var;
2032 continue;
2033 }
2034 var = permute_vec_elements (prev_res, var,
2035 perm_mask, stmt, gsi);
2036 new_stmt = SSA_NAME_DEF_STMT (var);
2037 }
2038
2039 if (prev_stmt_info == NULL)
2040 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2041 else
2042 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2043 prev_stmt_info = vinfo_for_stmt (new_stmt);
2044 }
2045
2046 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2047 from the IL. */
2048 tree lhs = gimple_call_lhs (stmt);
2049 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2050 set_vinfo_for_stmt (new_stmt, stmt_info);
2051 set_vinfo_for_stmt (stmt, NULL);
2052 STMT_VINFO_STMT (stmt_info) = new_stmt;
2053 gsi_replace (gsi, new_stmt, true);
2054 return true;
2055 }
2056 else if (is_store)
2057 {
2058 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2059 prev_stmt_info = NULL;
2060 for (i = 0; i < ncopies; i++)
2061 {
2062 unsigned align, misalign;
2063
2064 if (i == 0)
2065 {
2066 tree rhs = gimple_call_arg (stmt, 3);
2067 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2068 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2069 /* We should have catched mismatched types earlier. */
2070 gcc_assert (useless_type_conversion_p (vectype,
2071 TREE_TYPE (vec_rhs)));
2072 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2073 NULL_TREE, &dummy, gsi,
2074 &ptr_incr, false, &inv_p);
2075 gcc_assert (!inv_p);
2076 }
2077 else
2078 {
2079 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2080 &def, &dt);
2081 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2082 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2083 &def, &dt);
2084 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2085 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2086 TYPE_SIZE_UNIT (vectype));
2087 }
2088
2089 align = TYPE_ALIGN_UNIT (vectype);
2090 if (aligned_access_p (dr))
2091 misalign = 0;
2092 else if (DR_MISALIGNMENT (dr) == -1)
2093 {
2094 align = TYPE_ALIGN_UNIT (elem_type);
2095 misalign = 0;
2096 }
2097 else
2098 misalign = DR_MISALIGNMENT (dr);
2099 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2100 misalign);
2101 new_stmt
2102 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2103 gimple_call_arg (stmt, 1),
2104 vec_mask, vec_rhs);
2105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2106 if (i == 0)
2107 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2108 else
2109 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2110 prev_stmt_info = vinfo_for_stmt (new_stmt);
2111 }
2112 }
2113 else
2114 {
2115 tree vec_mask = NULL_TREE;
2116 prev_stmt_info = NULL;
2117 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2118 for (i = 0; i < ncopies; i++)
2119 {
2120 unsigned align, misalign;
2121
2122 if (i == 0)
2123 {
2124 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2125 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2126 NULL_TREE, &dummy, gsi,
2127 &ptr_incr, false, &inv_p);
2128 gcc_assert (!inv_p);
2129 }
2130 else
2131 {
2132 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2133 &def, &dt);
2134 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2135 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2136 TYPE_SIZE_UNIT (vectype));
2137 }
2138
2139 align = TYPE_ALIGN_UNIT (vectype);
2140 if (aligned_access_p (dr))
2141 misalign = 0;
2142 else if (DR_MISALIGNMENT (dr) == -1)
2143 {
2144 align = TYPE_ALIGN_UNIT (elem_type);
2145 misalign = 0;
2146 }
2147 else
2148 misalign = DR_MISALIGNMENT (dr);
2149 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2150 misalign);
2151 new_stmt
2152 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2153 gimple_call_arg (stmt, 1),
2154 vec_mask);
2155 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2157 if (i == 0)
2158 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2159 else
2160 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2161 prev_stmt_info = vinfo_for_stmt (new_stmt);
2162 }
2163 }
2164
2165 if (!is_store)
2166 {
2167 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2168 from the IL. */
2169 tree lhs = gimple_call_lhs (stmt);
2170 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2171 set_vinfo_for_stmt (new_stmt, stmt_info);
2172 set_vinfo_for_stmt (stmt, NULL);
2173 STMT_VINFO_STMT (stmt_info) = new_stmt;
2174 gsi_replace (gsi, new_stmt, true);
2175 }
2176
2177 return true;
2178 }
2179
2180
2181 /* Function vectorizable_call.
2182
2183 Check if GS performs a function call that can be vectorized.
2184 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2185 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2186 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2187
2188 static bool
2189 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2190 slp_tree slp_node)
2191 {
2192 gcall *stmt;
2193 tree vec_dest;
2194 tree scalar_dest;
2195 tree op, type;
2196 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2197 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2198 tree vectype_out, vectype_in;
2199 int nunits_in;
2200 int nunits_out;
2201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2202 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2203 tree fndecl, new_temp, def, rhs_type;
2204 gimple def_stmt;
2205 enum vect_def_type dt[3]
2206 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2207 gimple new_stmt = NULL;
2208 int ncopies, j;
2209 vec<tree> vargs = vNULL;
2210 enum { NARROW, NONE, WIDEN } modifier;
2211 size_t i, nargs;
2212 tree lhs;
2213
2214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2215 return false;
2216
2217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2218 return false;
2219
2220 /* Is GS a vectorizable call? */
2221 stmt = dyn_cast <gcall *> (gs);
2222 if (!stmt)
2223 return false;
2224
2225 if (gimple_call_internal_p (stmt)
2226 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2227 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2228 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2229 slp_node);
2230
2231 if (gimple_call_lhs (stmt) == NULL_TREE
2232 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2233 return false;
2234
2235 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2236
2237 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2238
2239 /* Process function arguments. */
2240 rhs_type = NULL_TREE;
2241 vectype_in = NULL_TREE;
2242 nargs = gimple_call_num_args (stmt);
2243
2244 /* Bail out if the function has more than three arguments, we do not have
2245 interesting builtin functions to vectorize with more than two arguments
2246 except for fma. No arguments is also not good. */
2247 if (nargs == 0 || nargs > 3)
2248 return false;
2249
2250 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2251 if (gimple_call_internal_p (stmt)
2252 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2253 {
2254 nargs = 0;
2255 rhs_type = unsigned_type_node;
2256 }
2257
2258 for (i = 0; i < nargs; i++)
2259 {
2260 tree opvectype;
2261
2262 op = gimple_call_arg (stmt, i);
2263
2264 /* We can only handle calls with arguments of the same type. */
2265 if (rhs_type
2266 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2267 {
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2270 "argument types differ.\n");
2271 return false;
2272 }
2273 if (!rhs_type)
2274 rhs_type = TREE_TYPE (op);
2275
2276 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2277 &def_stmt, &def, &dt[i], &opvectype))
2278 {
2279 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2281 "use not simple.\n");
2282 return false;
2283 }
2284
2285 if (!vectype_in)
2286 vectype_in = opvectype;
2287 else if (opvectype
2288 && opvectype != vectype_in)
2289 {
2290 if (dump_enabled_p ())
2291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2292 "argument vector types differ.\n");
2293 return false;
2294 }
2295 }
2296 /* If all arguments are external or constant defs use a vector type with
2297 the same size as the output vector type. */
2298 if (!vectype_in)
2299 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2300 if (vec_stmt)
2301 gcc_assert (vectype_in);
2302 if (!vectype_in)
2303 {
2304 if (dump_enabled_p ())
2305 {
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2307 "no vectype for scalar type ");
2308 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2309 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2310 }
2311
2312 return false;
2313 }
2314
2315 /* FORNOW */
2316 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2317 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2318 if (nunits_in == nunits_out / 2)
2319 modifier = NARROW;
2320 else if (nunits_out == nunits_in)
2321 modifier = NONE;
2322 else if (nunits_out == nunits_in / 2)
2323 modifier = WIDEN;
2324 else
2325 return false;
2326
2327 /* For now, we only vectorize functions if a target specific builtin
2328 is available. TODO -- in some cases, it might be profitable to
2329 insert the calls for pieces of the vector, in order to be able
2330 to vectorize other operations in the loop. */
2331 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2332 if (fndecl == NULL_TREE)
2333 {
2334 if (gimple_call_internal_p (stmt)
2335 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2336 && !slp_node
2337 && loop_vinfo
2338 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2339 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2340 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2341 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2342 {
2343 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2344 { 0, 1, 2, ... vf - 1 } vector. */
2345 gcc_assert (nargs == 0);
2346 }
2347 else
2348 {
2349 if (dump_enabled_p ())
2350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2351 "function is not vectorizable.\n");
2352 return false;
2353 }
2354 }
2355
2356 gcc_assert (!gimple_vuse (stmt));
2357
2358 if (slp_node || PURE_SLP_STMT (stmt_info))
2359 ncopies = 1;
2360 else if (modifier == NARROW)
2361 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2362 else
2363 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2364
2365 /* Sanity check: make sure that at least one copy of the vectorized stmt
2366 needs to be generated. */
2367 gcc_assert (ncopies >= 1);
2368
2369 if (!vec_stmt) /* transformation not required. */
2370 {
2371 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2372 if (dump_enabled_p ())
2373 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2374 "\n");
2375 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2376 return true;
2377 }
2378
2379 /** Transform. **/
2380
2381 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2383
2384 /* Handle def. */
2385 scalar_dest = gimple_call_lhs (stmt);
2386 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2387
2388 prev_stmt_info = NULL;
2389 switch (modifier)
2390 {
2391 case NONE:
2392 for (j = 0; j < ncopies; ++j)
2393 {
2394 /* Build argument list for the vectorized call. */
2395 if (j == 0)
2396 vargs.create (nargs);
2397 else
2398 vargs.truncate (0);
2399
2400 if (slp_node)
2401 {
2402 auto_vec<vec<tree> > vec_defs (nargs);
2403 vec<tree> vec_oprnds0;
2404
2405 for (i = 0; i < nargs; i++)
2406 vargs.quick_push (gimple_call_arg (stmt, i));
2407 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2408 vec_oprnds0 = vec_defs[0];
2409
2410 /* Arguments are ready. Create the new vector stmt. */
2411 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2412 {
2413 size_t k;
2414 for (k = 0; k < nargs; k++)
2415 {
2416 vec<tree> vec_oprndsk = vec_defs[k];
2417 vargs[k] = vec_oprndsk[i];
2418 }
2419 new_stmt = gimple_build_call_vec (fndecl, vargs);
2420 new_temp = make_ssa_name (vec_dest, new_stmt);
2421 gimple_call_set_lhs (new_stmt, new_temp);
2422 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2423 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2424 }
2425
2426 for (i = 0; i < nargs; i++)
2427 {
2428 vec<tree> vec_oprndsi = vec_defs[i];
2429 vec_oprndsi.release ();
2430 }
2431 continue;
2432 }
2433
2434 for (i = 0; i < nargs; i++)
2435 {
2436 op = gimple_call_arg (stmt, i);
2437 if (j == 0)
2438 vec_oprnd0
2439 = vect_get_vec_def_for_operand (op, stmt, NULL);
2440 else
2441 {
2442 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2443 vec_oprnd0
2444 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2445 }
2446
2447 vargs.quick_push (vec_oprnd0);
2448 }
2449
2450 if (gimple_call_internal_p (stmt)
2451 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2452 {
2453 tree *v = XALLOCAVEC (tree, nunits_out);
2454 int k;
2455 for (k = 0; k < nunits_out; ++k)
2456 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2457 tree cst = build_vector (vectype_out, v);
2458 tree new_var
2459 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2460 gimple init_stmt = gimple_build_assign (new_var, cst);
2461 new_temp = make_ssa_name (new_var, init_stmt);
2462 gimple_assign_set_lhs (init_stmt, new_temp);
2463 vect_init_vector_1 (stmt, init_stmt, NULL);
2464 new_temp = make_ssa_name (vec_dest);
2465 new_stmt = gimple_build_assign (new_temp,
2466 gimple_assign_lhs (init_stmt));
2467 }
2468 else
2469 {
2470 new_stmt = gimple_build_call_vec (fndecl, vargs);
2471 new_temp = make_ssa_name (vec_dest, new_stmt);
2472 gimple_call_set_lhs (new_stmt, new_temp);
2473 }
2474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2475
2476 if (j == 0)
2477 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2478 else
2479 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2480
2481 prev_stmt_info = vinfo_for_stmt (new_stmt);
2482 }
2483
2484 break;
2485
2486 case NARROW:
2487 for (j = 0; j < ncopies; ++j)
2488 {
2489 /* Build argument list for the vectorized call. */
2490 if (j == 0)
2491 vargs.create (nargs * 2);
2492 else
2493 vargs.truncate (0);
2494
2495 if (slp_node)
2496 {
2497 auto_vec<vec<tree> > vec_defs (nargs);
2498 vec<tree> vec_oprnds0;
2499
2500 for (i = 0; i < nargs; i++)
2501 vargs.quick_push (gimple_call_arg (stmt, i));
2502 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2503 vec_oprnds0 = vec_defs[0];
2504
2505 /* Arguments are ready. Create the new vector stmt. */
2506 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2507 {
2508 size_t k;
2509 vargs.truncate (0);
2510 for (k = 0; k < nargs; k++)
2511 {
2512 vec<tree> vec_oprndsk = vec_defs[k];
2513 vargs.quick_push (vec_oprndsk[i]);
2514 vargs.quick_push (vec_oprndsk[i + 1]);
2515 }
2516 new_stmt = gimple_build_call_vec (fndecl, vargs);
2517 new_temp = make_ssa_name (vec_dest, new_stmt);
2518 gimple_call_set_lhs (new_stmt, new_temp);
2519 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2520 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2521 }
2522
2523 for (i = 0; i < nargs; i++)
2524 {
2525 vec<tree> vec_oprndsi = vec_defs[i];
2526 vec_oprndsi.release ();
2527 }
2528 continue;
2529 }
2530
2531 for (i = 0; i < nargs; i++)
2532 {
2533 op = gimple_call_arg (stmt, i);
2534 if (j == 0)
2535 {
2536 vec_oprnd0
2537 = vect_get_vec_def_for_operand (op, stmt, NULL);
2538 vec_oprnd1
2539 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2540 }
2541 else
2542 {
2543 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2544 vec_oprnd0
2545 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2546 vec_oprnd1
2547 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2548 }
2549
2550 vargs.quick_push (vec_oprnd0);
2551 vargs.quick_push (vec_oprnd1);
2552 }
2553
2554 new_stmt = gimple_build_call_vec (fndecl, vargs);
2555 new_temp = make_ssa_name (vec_dest, new_stmt);
2556 gimple_call_set_lhs (new_stmt, new_temp);
2557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2558
2559 if (j == 0)
2560 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2561 else
2562 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2563
2564 prev_stmt_info = vinfo_for_stmt (new_stmt);
2565 }
2566
2567 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2568
2569 break;
2570
2571 case WIDEN:
2572 /* No current target implements this case. */
2573 return false;
2574 }
2575
2576 vargs.release ();
2577
2578 /* The call in STMT might prevent it from being removed in dce.
2579 We however cannot remove it here, due to the way the ssa name
2580 it defines is mapped to the new definition. So just replace
2581 rhs of the statement with something harmless. */
2582
2583 if (slp_node)
2584 return true;
2585
2586 type = TREE_TYPE (scalar_dest);
2587 if (is_pattern_stmt_p (stmt_info))
2588 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2589 else
2590 lhs = gimple_call_lhs (stmt);
2591
2592 if (gimple_call_internal_p (stmt)
2593 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2594 {
2595 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2596 with vf - 1 rather than 0, that is the last iteration of the
2597 vectorized loop. */
2598 imm_use_iterator iter;
2599 use_operand_p use_p;
2600 gimple use_stmt;
2601 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2602 {
2603 basic_block use_bb = gimple_bb (use_stmt);
2604 if (use_bb
2605 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2606 {
2607 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2608 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2609 ncopies * nunits_out - 1));
2610 update_stmt (use_stmt);
2611 }
2612 }
2613 }
2614
2615 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2616 set_vinfo_for_stmt (new_stmt, stmt_info);
2617 set_vinfo_for_stmt (stmt, NULL);
2618 STMT_VINFO_STMT (stmt_info) = new_stmt;
2619 gsi_replace (gsi, new_stmt, false);
2620
2621 return true;
2622 }
2623
2624
2625 struct simd_call_arg_info
2626 {
2627 tree vectype;
2628 tree op;
2629 enum vect_def_type dt;
2630 HOST_WIDE_INT linear_step;
2631 unsigned int align;
2632 bool simd_lane_linear;
2633 };
2634
2635 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2636 is linear within simd lane (but not within whole loop), note it in
2637 *ARGINFO. */
2638
2639 static void
2640 vect_simd_lane_linear (tree op, struct loop *loop,
2641 struct simd_call_arg_info *arginfo)
2642 {
2643 gimple def_stmt = SSA_NAME_DEF_STMT (op);
2644
2645 if (!is_gimple_assign (def_stmt)
2646 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2647 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2648 return;
2649
2650 tree base = gimple_assign_rhs1 (def_stmt);
2651 HOST_WIDE_INT linear_step = 0;
2652 tree v = gimple_assign_rhs2 (def_stmt);
2653 while (TREE_CODE (v) == SSA_NAME)
2654 {
2655 tree t;
2656 def_stmt = SSA_NAME_DEF_STMT (v);
2657 if (is_gimple_assign (def_stmt))
2658 switch (gimple_assign_rhs_code (def_stmt))
2659 {
2660 case PLUS_EXPR:
2661 t = gimple_assign_rhs2 (def_stmt);
2662 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2663 return;
2664 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2665 v = gimple_assign_rhs1 (def_stmt);
2666 continue;
2667 case MULT_EXPR:
2668 t = gimple_assign_rhs2 (def_stmt);
2669 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2670 return;
2671 linear_step = tree_to_shwi (t);
2672 v = gimple_assign_rhs1 (def_stmt);
2673 continue;
2674 CASE_CONVERT:
2675 t = gimple_assign_rhs1 (def_stmt);
2676 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2677 || (TYPE_PRECISION (TREE_TYPE (v))
2678 < TYPE_PRECISION (TREE_TYPE (t))))
2679 return;
2680 if (!linear_step)
2681 linear_step = 1;
2682 v = t;
2683 continue;
2684 default:
2685 return;
2686 }
2687 else if (is_gimple_call (def_stmt)
2688 && gimple_call_internal_p (def_stmt)
2689 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2690 && loop->simduid
2691 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2692 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2693 == loop->simduid))
2694 {
2695 if (!linear_step)
2696 linear_step = 1;
2697 arginfo->linear_step = linear_step;
2698 arginfo->op = base;
2699 arginfo->simd_lane_linear = true;
2700 return;
2701 }
2702 }
2703 }
2704
2705 /* Function vectorizable_simd_clone_call.
2706
2707 Check if STMT performs a function call that can be vectorized
2708 by calling a simd clone of the function.
2709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2712
2713 static bool
2714 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2715 gimple *vec_stmt, slp_tree slp_node)
2716 {
2717 tree vec_dest;
2718 tree scalar_dest;
2719 tree op, type;
2720 tree vec_oprnd0 = NULL_TREE;
2721 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2722 tree vectype;
2723 unsigned int nunits;
2724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2725 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2726 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2727 tree fndecl, new_temp, def;
2728 gimple def_stmt;
2729 gimple new_stmt = NULL;
2730 int ncopies, j;
2731 vec<simd_call_arg_info> arginfo = vNULL;
2732 vec<tree> vargs = vNULL;
2733 size_t i, nargs;
2734 tree lhs, rtype, ratype;
2735 vec<constructor_elt, va_gc> *ret_ctor_elts;
2736
2737 /* Is STMT a vectorizable call? */
2738 if (!is_gimple_call (stmt))
2739 return false;
2740
2741 fndecl = gimple_call_fndecl (stmt);
2742 if (fndecl == NULL_TREE)
2743 return false;
2744
2745 struct cgraph_node *node = cgraph_node::get (fndecl);
2746 if (node == NULL || node->simd_clones == NULL)
2747 return false;
2748
2749 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2750 return false;
2751
2752 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2753 return false;
2754
2755 if (gimple_call_lhs (stmt)
2756 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2757 return false;
2758
2759 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2760
2761 vectype = STMT_VINFO_VECTYPE (stmt_info);
2762
2763 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2764 return false;
2765
2766 /* FORNOW */
2767 if (slp_node || PURE_SLP_STMT (stmt_info))
2768 return false;
2769
2770 /* Process function arguments. */
2771 nargs = gimple_call_num_args (stmt);
2772
2773 /* Bail out if the function has zero arguments. */
2774 if (nargs == 0)
2775 return false;
2776
2777 arginfo.create (nargs);
2778
2779 for (i = 0; i < nargs; i++)
2780 {
2781 simd_call_arg_info thisarginfo;
2782 affine_iv iv;
2783
2784 thisarginfo.linear_step = 0;
2785 thisarginfo.align = 0;
2786 thisarginfo.op = NULL_TREE;
2787 thisarginfo.simd_lane_linear = false;
2788
2789 op = gimple_call_arg (stmt, i);
2790 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2791 &def_stmt, &def, &thisarginfo.dt,
2792 &thisarginfo.vectype)
2793 || thisarginfo.dt == vect_uninitialized_def)
2794 {
2795 if (dump_enabled_p ())
2796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2797 "use not simple.\n");
2798 arginfo.release ();
2799 return false;
2800 }
2801
2802 if (thisarginfo.dt == vect_constant_def
2803 || thisarginfo.dt == vect_external_def)
2804 gcc_assert (thisarginfo.vectype == NULL_TREE);
2805 else
2806 gcc_assert (thisarginfo.vectype != NULL_TREE);
2807
2808 /* For linear arguments, the analyze phase should have saved
2809 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2810 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2811 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2812 {
2813 gcc_assert (vec_stmt);
2814 thisarginfo.linear_step
2815 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2816 thisarginfo.op
2817 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2818 thisarginfo.simd_lane_linear
2819 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2820 == boolean_true_node);
2821 /* If loop has been peeled for alignment, we need to adjust it. */
2822 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2823 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2824 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2825 {
2826 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2827 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2828 tree opt = TREE_TYPE (thisarginfo.op);
2829 bias = fold_convert (TREE_TYPE (step), bias);
2830 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2831 thisarginfo.op
2832 = fold_build2 (POINTER_TYPE_P (opt)
2833 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2834 thisarginfo.op, bias);
2835 }
2836 }
2837 else if (!vec_stmt
2838 && thisarginfo.dt != vect_constant_def
2839 && thisarginfo.dt != vect_external_def
2840 && loop_vinfo
2841 && TREE_CODE (op) == SSA_NAME
2842 && simple_iv (loop, loop_containing_stmt (stmt), op,
2843 &iv, false)
2844 && tree_fits_shwi_p (iv.step))
2845 {
2846 thisarginfo.linear_step = tree_to_shwi (iv.step);
2847 thisarginfo.op = iv.base;
2848 }
2849 else if ((thisarginfo.dt == vect_constant_def
2850 || thisarginfo.dt == vect_external_def)
2851 && POINTER_TYPE_P (TREE_TYPE (op)))
2852 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2853 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2854 linear too. */
2855 if (POINTER_TYPE_P (TREE_TYPE (op))
2856 && !thisarginfo.linear_step
2857 && !vec_stmt
2858 && thisarginfo.dt != vect_constant_def
2859 && thisarginfo.dt != vect_external_def
2860 && loop_vinfo
2861 && !slp_node
2862 && TREE_CODE (op) == SSA_NAME)
2863 vect_simd_lane_linear (op, loop, &thisarginfo);
2864
2865 arginfo.quick_push (thisarginfo);
2866 }
2867
2868 unsigned int badness = 0;
2869 struct cgraph_node *bestn = NULL;
2870 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2871 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2872 else
2873 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2874 n = n->simdclone->next_clone)
2875 {
2876 unsigned int this_badness = 0;
2877 if (n->simdclone->simdlen
2878 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2879 || n->simdclone->nargs != nargs)
2880 continue;
2881 if (n->simdclone->simdlen
2882 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2883 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2884 - exact_log2 (n->simdclone->simdlen)) * 1024;
2885 if (n->simdclone->inbranch)
2886 this_badness += 2048;
2887 int target_badness = targetm.simd_clone.usable (n);
2888 if (target_badness < 0)
2889 continue;
2890 this_badness += target_badness * 512;
2891 /* FORNOW: Have to add code to add the mask argument. */
2892 if (n->simdclone->inbranch)
2893 continue;
2894 for (i = 0; i < nargs; i++)
2895 {
2896 switch (n->simdclone->args[i].arg_type)
2897 {
2898 case SIMD_CLONE_ARG_TYPE_VECTOR:
2899 if (!useless_type_conversion_p
2900 (n->simdclone->args[i].orig_type,
2901 TREE_TYPE (gimple_call_arg (stmt, i))))
2902 i = -1;
2903 else if (arginfo[i].dt == vect_constant_def
2904 || arginfo[i].dt == vect_external_def
2905 || arginfo[i].linear_step)
2906 this_badness += 64;
2907 break;
2908 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2909 if (arginfo[i].dt != vect_constant_def
2910 && arginfo[i].dt != vect_external_def)
2911 i = -1;
2912 break;
2913 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2914 if (arginfo[i].dt == vect_constant_def
2915 || arginfo[i].dt == vect_external_def
2916 || (arginfo[i].linear_step
2917 != n->simdclone->args[i].linear_step))
2918 i = -1;
2919 break;
2920 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2921 /* FORNOW */
2922 i = -1;
2923 break;
2924 case SIMD_CLONE_ARG_TYPE_MASK:
2925 gcc_unreachable ();
2926 }
2927 if (i == (size_t) -1)
2928 break;
2929 if (n->simdclone->args[i].alignment > arginfo[i].align)
2930 {
2931 i = -1;
2932 break;
2933 }
2934 if (arginfo[i].align)
2935 this_badness += (exact_log2 (arginfo[i].align)
2936 - exact_log2 (n->simdclone->args[i].alignment));
2937 }
2938 if (i == (size_t) -1)
2939 continue;
2940 if (bestn == NULL || this_badness < badness)
2941 {
2942 bestn = n;
2943 badness = this_badness;
2944 }
2945 }
2946
2947 if (bestn == NULL)
2948 {
2949 arginfo.release ();
2950 return false;
2951 }
2952
2953 for (i = 0; i < nargs; i++)
2954 if ((arginfo[i].dt == vect_constant_def
2955 || arginfo[i].dt == vect_external_def)
2956 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2957 {
2958 arginfo[i].vectype
2959 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2960 i)));
2961 if (arginfo[i].vectype == NULL
2962 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2963 > bestn->simdclone->simdlen))
2964 {
2965 arginfo.release ();
2966 return false;
2967 }
2968 }
2969
2970 fndecl = bestn->decl;
2971 nunits = bestn->simdclone->simdlen;
2972 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2973
2974 /* If the function isn't const, only allow it in simd loops where user
2975 has asserted that at least nunits consecutive iterations can be
2976 performed using SIMD instructions. */
2977 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2978 && gimple_vuse (stmt))
2979 {
2980 arginfo.release ();
2981 return false;
2982 }
2983
2984 /* Sanity check: make sure that at least one copy of the vectorized stmt
2985 needs to be generated. */
2986 gcc_assert (ncopies >= 1);
2987
2988 if (!vec_stmt) /* transformation not required. */
2989 {
2990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2991 for (i = 0; i < nargs; i++)
2992 if (bestn->simdclone->args[i].arg_type
2993 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2994 {
2995 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
2996 + 1);
2997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2998 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2999 ? size_type_node : TREE_TYPE (arginfo[i].op);
3000 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3001 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3002 tree sll = arginfo[i].simd_lane_linear
3003 ? boolean_true_node : boolean_false_node;
3004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3005 }
3006 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3007 if (dump_enabled_p ())
3008 dump_printf_loc (MSG_NOTE, vect_location,
3009 "=== vectorizable_simd_clone_call ===\n");
3010 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3011 arginfo.release ();
3012 return true;
3013 }
3014
3015 /** Transform. **/
3016
3017 if (dump_enabled_p ())
3018 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3019
3020 /* Handle def. */
3021 scalar_dest = gimple_call_lhs (stmt);
3022 vec_dest = NULL_TREE;
3023 rtype = NULL_TREE;
3024 ratype = NULL_TREE;
3025 if (scalar_dest)
3026 {
3027 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3028 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3029 if (TREE_CODE (rtype) == ARRAY_TYPE)
3030 {
3031 ratype = rtype;
3032 rtype = TREE_TYPE (ratype);
3033 }
3034 }
3035
3036 prev_stmt_info = NULL;
3037 for (j = 0; j < ncopies; ++j)
3038 {
3039 /* Build argument list for the vectorized call. */
3040 if (j == 0)
3041 vargs.create (nargs);
3042 else
3043 vargs.truncate (0);
3044
3045 for (i = 0; i < nargs; i++)
3046 {
3047 unsigned int k, l, m, o;
3048 tree atype;
3049 op = gimple_call_arg (stmt, i);
3050 switch (bestn->simdclone->args[i].arg_type)
3051 {
3052 case SIMD_CLONE_ARG_TYPE_VECTOR:
3053 atype = bestn->simdclone->args[i].vector_type;
3054 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3055 for (m = j * o; m < (j + 1) * o; m++)
3056 {
3057 if (TYPE_VECTOR_SUBPARTS (atype)
3058 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3059 {
3060 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3061 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3062 / TYPE_VECTOR_SUBPARTS (atype));
3063 gcc_assert ((k & (k - 1)) == 0);
3064 if (m == 0)
3065 vec_oprnd0
3066 = vect_get_vec_def_for_operand (op, stmt, NULL);
3067 else
3068 {
3069 vec_oprnd0 = arginfo[i].op;
3070 if ((m & (k - 1)) == 0)
3071 vec_oprnd0
3072 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3073 vec_oprnd0);
3074 }
3075 arginfo[i].op = vec_oprnd0;
3076 vec_oprnd0
3077 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3078 size_int (prec),
3079 bitsize_int ((m & (k - 1)) * prec));
3080 new_stmt
3081 = gimple_build_assign (make_ssa_name (atype),
3082 vec_oprnd0);
3083 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3084 vargs.safe_push (gimple_assign_lhs (new_stmt));
3085 }
3086 else
3087 {
3088 k = (TYPE_VECTOR_SUBPARTS (atype)
3089 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3090 gcc_assert ((k & (k - 1)) == 0);
3091 vec<constructor_elt, va_gc> *ctor_elts;
3092 if (k != 1)
3093 vec_alloc (ctor_elts, k);
3094 else
3095 ctor_elts = NULL;
3096 for (l = 0; l < k; l++)
3097 {
3098 if (m == 0 && l == 0)
3099 vec_oprnd0
3100 = vect_get_vec_def_for_operand (op, stmt, NULL);
3101 else
3102 vec_oprnd0
3103 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3104 arginfo[i].op);
3105 arginfo[i].op = vec_oprnd0;
3106 if (k == 1)
3107 break;
3108 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3109 vec_oprnd0);
3110 }
3111 if (k == 1)
3112 vargs.safe_push (vec_oprnd0);
3113 else
3114 {
3115 vec_oprnd0 = build_constructor (atype, ctor_elts);
3116 new_stmt
3117 = gimple_build_assign (make_ssa_name (atype),
3118 vec_oprnd0);
3119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3120 vargs.safe_push (gimple_assign_lhs (new_stmt));
3121 }
3122 }
3123 }
3124 break;
3125 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3126 vargs.safe_push (op);
3127 break;
3128 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3129 if (j == 0)
3130 {
3131 gimple_seq stmts;
3132 arginfo[i].op
3133 = force_gimple_operand (arginfo[i].op, &stmts, true,
3134 NULL_TREE);
3135 if (stmts != NULL)
3136 {
3137 basic_block new_bb;
3138 edge pe = loop_preheader_edge (loop);
3139 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3140 gcc_assert (!new_bb);
3141 }
3142 if (arginfo[i].simd_lane_linear)
3143 {
3144 vargs.safe_push (arginfo[i].op);
3145 break;
3146 }
3147 tree phi_res = copy_ssa_name (op);
3148 gphi *new_phi = create_phi_node (phi_res, loop->header);
3149 set_vinfo_for_stmt (new_phi,
3150 new_stmt_vec_info (new_phi, loop_vinfo,
3151 NULL));
3152 add_phi_arg (new_phi, arginfo[i].op,
3153 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3154 enum tree_code code
3155 = POINTER_TYPE_P (TREE_TYPE (op))
3156 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3157 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3158 ? sizetype : TREE_TYPE (op);
3159 widest_int cst
3160 = wi::mul (bestn->simdclone->args[i].linear_step,
3161 ncopies * nunits);
3162 tree tcst = wide_int_to_tree (type, cst);
3163 tree phi_arg = copy_ssa_name (op);
3164 new_stmt
3165 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3166 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3167 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3168 set_vinfo_for_stmt (new_stmt,
3169 new_stmt_vec_info (new_stmt, loop_vinfo,
3170 NULL));
3171 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3172 UNKNOWN_LOCATION);
3173 arginfo[i].op = phi_res;
3174 vargs.safe_push (phi_res);
3175 }
3176 else
3177 {
3178 enum tree_code code
3179 = POINTER_TYPE_P (TREE_TYPE (op))
3180 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3181 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3182 ? sizetype : TREE_TYPE (op);
3183 widest_int cst
3184 = wi::mul (bestn->simdclone->args[i].linear_step,
3185 j * nunits);
3186 tree tcst = wide_int_to_tree (type, cst);
3187 new_temp = make_ssa_name (TREE_TYPE (op));
3188 new_stmt = gimple_build_assign (new_temp, code,
3189 arginfo[i].op, tcst);
3190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3191 vargs.safe_push (new_temp);
3192 }
3193 break;
3194 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3195 default:
3196 gcc_unreachable ();
3197 }
3198 }
3199
3200 new_stmt = gimple_build_call_vec (fndecl, vargs);
3201 if (vec_dest)
3202 {
3203 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3204 if (ratype)
3205 new_temp = create_tmp_var (ratype);
3206 else if (TYPE_VECTOR_SUBPARTS (vectype)
3207 == TYPE_VECTOR_SUBPARTS (rtype))
3208 new_temp = make_ssa_name (vec_dest, new_stmt);
3209 else
3210 new_temp = make_ssa_name (rtype, new_stmt);
3211 gimple_call_set_lhs (new_stmt, new_temp);
3212 }
3213 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3214
3215 if (vec_dest)
3216 {
3217 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3218 {
3219 unsigned int k, l;
3220 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3221 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3222 gcc_assert ((k & (k - 1)) == 0);
3223 for (l = 0; l < k; l++)
3224 {
3225 tree t;
3226 if (ratype)
3227 {
3228 t = build_fold_addr_expr (new_temp);
3229 t = build2 (MEM_REF, vectype, t,
3230 build_int_cst (TREE_TYPE (t),
3231 l * prec / BITS_PER_UNIT));
3232 }
3233 else
3234 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3235 size_int (prec), bitsize_int (l * prec));
3236 new_stmt
3237 = gimple_build_assign (make_ssa_name (vectype), t);
3238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3239 if (j == 0 && l == 0)
3240 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3241 else
3242 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3243
3244 prev_stmt_info = vinfo_for_stmt (new_stmt);
3245 }
3246
3247 if (ratype)
3248 {
3249 tree clobber = build_constructor (ratype, NULL);
3250 TREE_THIS_VOLATILE (clobber) = 1;
3251 new_stmt = gimple_build_assign (new_temp, clobber);
3252 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3253 }
3254 continue;
3255 }
3256 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3257 {
3258 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3259 / TYPE_VECTOR_SUBPARTS (rtype));
3260 gcc_assert ((k & (k - 1)) == 0);
3261 if ((j & (k - 1)) == 0)
3262 vec_alloc (ret_ctor_elts, k);
3263 if (ratype)
3264 {
3265 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3266 for (m = 0; m < o; m++)
3267 {
3268 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3269 size_int (m), NULL_TREE, NULL_TREE);
3270 new_stmt
3271 = gimple_build_assign (make_ssa_name (rtype), tem);
3272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3273 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3274 gimple_assign_lhs (new_stmt));
3275 }
3276 tree clobber = build_constructor (ratype, NULL);
3277 TREE_THIS_VOLATILE (clobber) = 1;
3278 new_stmt = gimple_build_assign (new_temp, clobber);
3279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3280 }
3281 else
3282 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3283 if ((j & (k - 1)) != k - 1)
3284 continue;
3285 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3286 new_stmt
3287 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3288 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3289
3290 if ((unsigned) j == k - 1)
3291 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3292 else
3293 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3294
3295 prev_stmt_info = vinfo_for_stmt (new_stmt);
3296 continue;
3297 }
3298 else if (ratype)
3299 {
3300 tree t = build_fold_addr_expr (new_temp);
3301 t = build2 (MEM_REF, vectype, t,
3302 build_int_cst (TREE_TYPE (t), 0));
3303 new_stmt
3304 = gimple_build_assign (make_ssa_name (vec_dest), t);
3305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3306 tree clobber = build_constructor (ratype, NULL);
3307 TREE_THIS_VOLATILE (clobber) = 1;
3308 vect_finish_stmt_generation (stmt,
3309 gimple_build_assign (new_temp,
3310 clobber), gsi);
3311 }
3312 }
3313
3314 if (j == 0)
3315 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3316 else
3317 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3318
3319 prev_stmt_info = vinfo_for_stmt (new_stmt);
3320 }
3321
3322 vargs.release ();
3323
3324 /* The call in STMT might prevent it from being removed in dce.
3325 We however cannot remove it here, due to the way the ssa name
3326 it defines is mapped to the new definition. So just replace
3327 rhs of the statement with something harmless. */
3328
3329 if (slp_node)
3330 return true;
3331
3332 if (scalar_dest)
3333 {
3334 type = TREE_TYPE (scalar_dest);
3335 if (is_pattern_stmt_p (stmt_info))
3336 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3337 else
3338 lhs = gimple_call_lhs (stmt);
3339 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3340 }
3341 else
3342 new_stmt = gimple_build_nop ();
3343 set_vinfo_for_stmt (new_stmt, stmt_info);
3344 set_vinfo_for_stmt (stmt, NULL);
3345 STMT_VINFO_STMT (stmt_info) = new_stmt;
3346 gsi_replace (gsi, new_stmt, true);
3347 unlink_stmt_vdef (stmt);
3348
3349 return true;
3350 }
3351
3352
3353 /* Function vect_gen_widened_results_half
3354
3355 Create a vector stmt whose code, type, number of arguments, and result
3356 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3357 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3358 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3359 needs to be created (DECL is a function-decl of a target-builtin).
3360 STMT is the original scalar stmt that we are vectorizing. */
3361
3362 static gimple
3363 vect_gen_widened_results_half (enum tree_code code,
3364 tree decl,
3365 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3366 tree vec_dest, gimple_stmt_iterator *gsi,
3367 gimple stmt)
3368 {
3369 gimple new_stmt;
3370 tree new_temp;
3371
3372 /* Generate half of the widened result: */
3373 if (code == CALL_EXPR)
3374 {
3375 /* Target specific support */
3376 if (op_type == binary_op)
3377 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3378 else
3379 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3380 new_temp = make_ssa_name (vec_dest, new_stmt);
3381 gimple_call_set_lhs (new_stmt, new_temp);
3382 }
3383 else
3384 {
3385 /* Generic support */
3386 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3387 if (op_type != binary_op)
3388 vec_oprnd1 = NULL;
3389 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3390 new_temp = make_ssa_name (vec_dest, new_stmt);
3391 gimple_assign_set_lhs (new_stmt, new_temp);
3392 }
3393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3394
3395 return new_stmt;
3396 }
3397
3398
3399 /* Get vectorized definitions for loop-based vectorization. For the first
3400 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3401 scalar operand), and for the rest we get a copy with
3402 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3403 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3404 The vectors are collected into VEC_OPRNDS. */
3405
3406 static void
3407 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3408 vec<tree> *vec_oprnds, int multi_step_cvt)
3409 {
3410 tree vec_oprnd;
3411
3412 /* Get first vector operand. */
3413 /* All the vector operands except the very first one (that is scalar oprnd)
3414 are stmt copies. */
3415 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3416 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3417 else
3418 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3419
3420 vec_oprnds->quick_push (vec_oprnd);
3421
3422 /* Get second vector operand. */
3423 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3424 vec_oprnds->quick_push (vec_oprnd);
3425
3426 *oprnd = vec_oprnd;
3427
3428 /* For conversion in multiple steps, continue to get operands
3429 recursively. */
3430 if (multi_step_cvt)
3431 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3432 }
3433
3434
3435 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3436 For multi-step conversions store the resulting vectors and call the function
3437 recursively. */
3438
3439 static void
3440 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3441 int multi_step_cvt, gimple stmt,
3442 vec<tree> vec_dsts,
3443 gimple_stmt_iterator *gsi,
3444 slp_tree slp_node, enum tree_code code,
3445 stmt_vec_info *prev_stmt_info)
3446 {
3447 unsigned int i;
3448 tree vop0, vop1, new_tmp, vec_dest;
3449 gimple new_stmt;
3450 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3451
3452 vec_dest = vec_dsts.pop ();
3453
3454 for (i = 0; i < vec_oprnds->length (); i += 2)
3455 {
3456 /* Create demotion operation. */
3457 vop0 = (*vec_oprnds)[i];
3458 vop1 = (*vec_oprnds)[i + 1];
3459 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3460 new_tmp = make_ssa_name (vec_dest, new_stmt);
3461 gimple_assign_set_lhs (new_stmt, new_tmp);
3462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3463
3464 if (multi_step_cvt)
3465 /* Store the resulting vector for next recursive call. */
3466 (*vec_oprnds)[i/2] = new_tmp;
3467 else
3468 {
3469 /* This is the last step of the conversion sequence. Store the
3470 vectors in SLP_NODE or in vector info of the scalar statement
3471 (or in STMT_VINFO_RELATED_STMT chain). */
3472 if (slp_node)
3473 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3474 else
3475 {
3476 if (!*prev_stmt_info)
3477 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3478 else
3479 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3480
3481 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3482 }
3483 }
3484 }
3485
3486 /* For multi-step demotion operations we first generate demotion operations
3487 from the source type to the intermediate types, and then combine the
3488 results (stored in VEC_OPRNDS) in demotion operation to the destination
3489 type. */
3490 if (multi_step_cvt)
3491 {
3492 /* At each level of recursion we have half of the operands we had at the
3493 previous level. */
3494 vec_oprnds->truncate ((i+1)/2);
3495 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3496 stmt, vec_dsts, gsi, slp_node,
3497 VEC_PACK_TRUNC_EXPR,
3498 prev_stmt_info);
3499 }
3500
3501 vec_dsts.quick_push (vec_dest);
3502 }
3503
3504
3505 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3506 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3507 the resulting vectors and call the function recursively. */
3508
3509 static void
3510 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3511 vec<tree> *vec_oprnds1,
3512 gimple stmt, tree vec_dest,
3513 gimple_stmt_iterator *gsi,
3514 enum tree_code code1,
3515 enum tree_code code2, tree decl1,
3516 tree decl2, int op_type)
3517 {
3518 int i;
3519 tree vop0, vop1, new_tmp1, new_tmp2;
3520 gimple new_stmt1, new_stmt2;
3521 vec<tree> vec_tmp = vNULL;
3522
3523 vec_tmp.create (vec_oprnds0->length () * 2);
3524 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3525 {
3526 if (op_type == binary_op)
3527 vop1 = (*vec_oprnds1)[i];
3528 else
3529 vop1 = NULL_TREE;
3530
3531 /* Generate the two halves of promotion operation. */
3532 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3533 op_type, vec_dest, gsi, stmt);
3534 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3535 op_type, vec_dest, gsi, stmt);
3536 if (is_gimple_call (new_stmt1))
3537 {
3538 new_tmp1 = gimple_call_lhs (new_stmt1);
3539 new_tmp2 = gimple_call_lhs (new_stmt2);
3540 }
3541 else
3542 {
3543 new_tmp1 = gimple_assign_lhs (new_stmt1);
3544 new_tmp2 = gimple_assign_lhs (new_stmt2);
3545 }
3546
3547 /* Store the results for the next step. */
3548 vec_tmp.quick_push (new_tmp1);
3549 vec_tmp.quick_push (new_tmp2);
3550 }
3551
3552 vec_oprnds0->release ();
3553 *vec_oprnds0 = vec_tmp;
3554 }
3555
3556
3557 /* Check if STMT performs a conversion operation, that can be vectorized.
3558 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3559 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3560 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3561
3562 static bool
3563 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3564 gimple *vec_stmt, slp_tree slp_node)
3565 {
3566 tree vec_dest;
3567 tree scalar_dest;
3568 tree op0, op1 = NULL_TREE;
3569 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3570 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3572 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3573 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3574 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3575 tree new_temp;
3576 tree def;
3577 gimple def_stmt;
3578 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3579 gimple new_stmt = NULL;
3580 stmt_vec_info prev_stmt_info;
3581 int nunits_in;
3582 int nunits_out;
3583 tree vectype_out, vectype_in;
3584 int ncopies, i, j;
3585 tree lhs_type, rhs_type;
3586 enum { NARROW, NONE, WIDEN } modifier;
3587 vec<tree> vec_oprnds0 = vNULL;
3588 vec<tree> vec_oprnds1 = vNULL;
3589 tree vop0;
3590 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3591 int multi_step_cvt = 0;
3592 vec<tree> vec_dsts = vNULL;
3593 vec<tree> interm_types = vNULL;
3594 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3595 int op_type;
3596 machine_mode rhs_mode;
3597 unsigned short fltsz;
3598
3599 /* Is STMT a vectorizable conversion? */
3600
3601 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3602 return false;
3603
3604 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3605 return false;
3606
3607 if (!is_gimple_assign (stmt))
3608 return false;
3609
3610 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3611 return false;
3612
3613 code = gimple_assign_rhs_code (stmt);
3614 if (!CONVERT_EXPR_CODE_P (code)
3615 && code != FIX_TRUNC_EXPR
3616 && code != FLOAT_EXPR
3617 && code != WIDEN_MULT_EXPR
3618 && code != WIDEN_LSHIFT_EXPR)
3619 return false;
3620
3621 op_type = TREE_CODE_LENGTH (code);
3622
3623 /* Check types of lhs and rhs. */
3624 scalar_dest = gimple_assign_lhs (stmt);
3625 lhs_type = TREE_TYPE (scalar_dest);
3626 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3627
3628 op0 = gimple_assign_rhs1 (stmt);
3629 rhs_type = TREE_TYPE (op0);
3630
3631 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3632 && !((INTEGRAL_TYPE_P (lhs_type)
3633 && INTEGRAL_TYPE_P (rhs_type))
3634 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3635 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3636 return false;
3637
3638 if ((INTEGRAL_TYPE_P (lhs_type)
3639 && (TYPE_PRECISION (lhs_type)
3640 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3641 || (INTEGRAL_TYPE_P (rhs_type)
3642 && (TYPE_PRECISION (rhs_type)
3643 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3644 {
3645 if (dump_enabled_p ())
3646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3647 "type conversion to/from bit-precision unsupported."
3648 "\n");
3649 return false;
3650 }
3651
3652 /* Check the operands of the operation. */
3653 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3654 &def_stmt, &def, &dt[0], &vectype_in))
3655 {
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3658 "use not simple.\n");
3659 return false;
3660 }
3661 if (op_type == binary_op)
3662 {
3663 bool ok;
3664
3665 op1 = gimple_assign_rhs2 (stmt);
3666 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3667 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3668 OP1. */
3669 if (CONSTANT_CLASS_P (op0))
3670 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3671 &def_stmt, &def, &dt[1], &vectype_in);
3672 else
3673 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3674 &def, &dt[1]);
3675
3676 if (!ok)
3677 {
3678 if (dump_enabled_p ())
3679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3680 "use not simple.\n");
3681 return false;
3682 }
3683 }
3684
3685 /* If op0 is an external or constant defs use a vector type of
3686 the same size as the output vector type. */
3687 if (!vectype_in)
3688 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3689 if (vec_stmt)
3690 gcc_assert (vectype_in);
3691 if (!vectype_in)
3692 {
3693 if (dump_enabled_p ())
3694 {
3695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3696 "no vectype for scalar type ");
3697 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3698 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3699 }
3700
3701 return false;
3702 }
3703
3704 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3705 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3706 if (nunits_in < nunits_out)
3707 modifier = NARROW;
3708 else if (nunits_out == nunits_in)
3709 modifier = NONE;
3710 else
3711 modifier = WIDEN;
3712
3713 /* Multiple types in SLP are handled by creating the appropriate number of
3714 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3715 case of SLP. */
3716 if (slp_node || PURE_SLP_STMT (stmt_info))
3717 ncopies = 1;
3718 else if (modifier == NARROW)
3719 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3720 else
3721 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3722
3723 /* Sanity check: make sure that at least one copy of the vectorized stmt
3724 needs to be generated. */
3725 gcc_assert (ncopies >= 1);
3726
3727 /* Supportable by target? */
3728 switch (modifier)
3729 {
3730 case NONE:
3731 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3732 return false;
3733 if (supportable_convert_operation (code, vectype_out, vectype_in,
3734 &decl1, &code1))
3735 break;
3736 /* FALLTHRU */
3737 unsupported:
3738 if (dump_enabled_p ())
3739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3740 "conversion not supported by target.\n");
3741 return false;
3742
3743 case WIDEN:
3744 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3745 &code1, &code2, &multi_step_cvt,
3746 &interm_types))
3747 {
3748 /* Binary widening operation can only be supported directly by the
3749 architecture. */
3750 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3751 break;
3752 }
3753
3754 if (code != FLOAT_EXPR
3755 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3756 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3757 goto unsupported;
3758
3759 rhs_mode = TYPE_MODE (rhs_type);
3760 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3761 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3762 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3763 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3764 {
3765 cvt_type
3766 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3767 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3768 if (cvt_type == NULL_TREE)
3769 goto unsupported;
3770
3771 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3772 {
3773 if (!supportable_convert_operation (code, vectype_out,
3774 cvt_type, &decl1, &codecvt1))
3775 goto unsupported;
3776 }
3777 else if (!supportable_widening_operation (code, stmt, vectype_out,
3778 cvt_type, &codecvt1,
3779 &codecvt2, &multi_step_cvt,
3780 &interm_types))
3781 continue;
3782 else
3783 gcc_assert (multi_step_cvt == 0);
3784
3785 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3786 vectype_in, &code1, &code2,
3787 &multi_step_cvt, &interm_types))
3788 break;
3789 }
3790
3791 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3792 goto unsupported;
3793
3794 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3795 codecvt2 = ERROR_MARK;
3796 else
3797 {
3798 multi_step_cvt++;
3799 interm_types.safe_push (cvt_type);
3800 cvt_type = NULL_TREE;
3801 }
3802 break;
3803
3804 case NARROW:
3805 gcc_assert (op_type == unary_op);
3806 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3807 &code1, &multi_step_cvt,
3808 &interm_types))
3809 break;
3810
3811 if (code != FIX_TRUNC_EXPR
3812 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3813 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3814 goto unsupported;
3815
3816 rhs_mode = TYPE_MODE (rhs_type);
3817 cvt_type
3818 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3819 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3820 if (cvt_type == NULL_TREE)
3821 goto unsupported;
3822 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3823 &decl1, &codecvt1))
3824 goto unsupported;
3825 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3826 &code1, &multi_step_cvt,
3827 &interm_types))
3828 break;
3829 goto unsupported;
3830
3831 default:
3832 gcc_unreachable ();
3833 }
3834
3835 if (!vec_stmt) /* transformation not required. */
3836 {
3837 if (dump_enabled_p ())
3838 dump_printf_loc (MSG_NOTE, vect_location,
3839 "=== vectorizable_conversion ===\n");
3840 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3841 {
3842 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3843 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3844 }
3845 else if (modifier == NARROW)
3846 {
3847 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3848 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3849 }
3850 else
3851 {
3852 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3853 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3854 }
3855 interm_types.release ();
3856 return true;
3857 }
3858
3859 /** Transform. **/
3860 if (dump_enabled_p ())
3861 dump_printf_loc (MSG_NOTE, vect_location,
3862 "transform conversion. ncopies = %d.\n", ncopies);
3863
3864 if (op_type == binary_op)
3865 {
3866 if (CONSTANT_CLASS_P (op0))
3867 op0 = fold_convert (TREE_TYPE (op1), op0);
3868 else if (CONSTANT_CLASS_P (op1))
3869 op1 = fold_convert (TREE_TYPE (op0), op1);
3870 }
3871
3872 /* In case of multi-step conversion, we first generate conversion operations
3873 to the intermediate types, and then from that types to the final one.
3874 We create vector destinations for the intermediate type (TYPES) received
3875 from supportable_*_operation, and store them in the correct order
3876 for future use in vect_create_vectorized_*_stmts (). */
3877 vec_dsts.create (multi_step_cvt + 1);
3878 vec_dest = vect_create_destination_var (scalar_dest,
3879 (cvt_type && modifier == WIDEN)
3880 ? cvt_type : vectype_out);
3881 vec_dsts.quick_push (vec_dest);
3882
3883 if (multi_step_cvt)
3884 {
3885 for (i = interm_types.length () - 1;
3886 interm_types.iterate (i, &intermediate_type); i--)
3887 {
3888 vec_dest = vect_create_destination_var (scalar_dest,
3889 intermediate_type);
3890 vec_dsts.quick_push (vec_dest);
3891 }
3892 }
3893
3894 if (cvt_type)
3895 vec_dest = vect_create_destination_var (scalar_dest,
3896 modifier == WIDEN
3897 ? vectype_out : cvt_type);
3898
3899 if (!slp_node)
3900 {
3901 if (modifier == WIDEN)
3902 {
3903 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3904 if (op_type == binary_op)
3905 vec_oprnds1.create (1);
3906 }
3907 else if (modifier == NARROW)
3908 vec_oprnds0.create (
3909 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3910 }
3911 else if (code == WIDEN_LSHIFT_EXPR)
3912 vec_oprnds1.create (slp_node->vec_stmts_size);
3913
3914 last_oprnd = op0;
3915 prev_stmt_info = NULL;
3916 switch (modifier)
3917 {
3918 case NONE:
3919 for (j = 0; j < ncopies; j++)
3920 {
3921 if (j == 0)
3922 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3923 -1);
3924 else
3925 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3926
3927 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3928 {
3929 /* Arguments are ready, create the new vector stmt. */
3930 if (code1 == CALL_EXPR)
3931 {
3932 new_stmt = gimple_build_call (decl1, 1, vop0);
3933 new_temp = make_ssa_name (vec_dest, new_stmt);
3934 gimple_call_set_lhs (new_stmt, new_temp);
3935 }
3936 else
3937 {
3938 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3939 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_assign_set_lhs (new_stmt, new_temp);
3942 }
3943
3944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3945 if (slp_node)
3946 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3947 else
3948 {
3949 if (!prev_stmt_info)
3950 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3951 else
3952 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3953 prev_stmt_info = vinfo_for_stmt (new_stmt);
3954 }
3955 }
3956 }
3957 break;
3958
3959 case WIDEN:
3960 /* In case the vectorization factor (VF) is bigger than the number
3961 of elements that we can fit in a vectype (nunits), we have to
3962 generate more than one vector stmt - i.e - we need to "unroll"
3963 the vector stmt by a factor VF/nunits. */
3964 for (j = 0; j < ncopies; j++)
3965 {
3966 /* Handle uses. */
3967 if (j == 0)
3968 {
3969 if (slp_node)
3970 {
3971 if (code == WIDEN_LSHIFT_EXPR)
3972 {
3973 unsigned int k;
3974
3975 vec_oprnd1 = op1;
3976 /* Store vec_oprnd1 for every vector stmt to be created
3977 for SLP_NODE. We check during the analysis that all
3978 the shift arguments are the same. */
3979 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3980 vec_oprnds1.quick_push (vec_oprnd1);
3981
3982 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3983 slp_node, -1);
3984 }
3985 else
3986 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3987 &vec_oprnds1, slp_node, -1);
3988 }
3989 else
3990 {
3991 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3992 vec_oprnds0.quick_push (vec_oprnd0);
3993 if (op_type == binary_op)
3994 {
3995 if (code == WIDEN_LSHIFT_EXPR)
3996 vec_oprnd1 = op1;
3997 else
3998 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3999 NULL);
4000 vec_oprnds1.quick_push (vec_oprnd1);
4001 }
4002 }
4003 }
4004 else
4005 {
4006 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4007 vec_oprnds0.truncate (0);
4008 vec_oprnds0.quick_push (vec_oprnd0);
4009 if (op_type == binary_op)
4010 {
4011 if (code == WIDEN_LSHIFT_EXPR)
4012 vec_oprnd1 = op1;
4013 else
4014 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4015 vec_oprnd1);
4016 vec_oprnds1.truncate (0);
4017 vec_oprnds1.quick_push (vec_oprnd1);
4018 }
4019 }
4020
4021 /* Arguments are ready. Create the new vector stmts. */
4022 for (i = multi_step_cvt; i >= 0; i--)
4023 {
4024 tree this_dest = vec_dsts[i];
4025 enum tree_code c1 = code1, c2 = code2;
4026 if (i == 0 && codecvt2 != ERROR_MARK)
4027 {
4028 c1 = codecvt1;
4029 c2 = codecvt2;
4030 }
4031 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4032 &vec_oprnds1,
4033 stmt, this_dest, gsi,
4034 c1, c2, decl1, decl2,
4035 op_type);
4036 }
4037
4038 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4039 {
4040 if (cvt_type)
4041 {
4042 if (codecvt1 == CALL_EXPR)
4043 {
4044 new_stmt = gimple_build_call (decl1, 1, vop0);
4045 new_temp = make_ssa_name (vec_dest, new_stmt);
4046 gimple_call_set_lhs (new_stmt, new_temp);
4047 }
4048 else
4049 {
4050 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4051 new_temp = make_ssa_name (vec_dest);
4052 new_stmt = gimple_build_assign (new_temp, codecvt1,
4053 vop0);
4054 }
4055
4056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4057 }
4058 else
4059 new_stmt = SSA_NAME_DEF_STMT (vop0);
4060
4061 if (slp_node)
4062 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4063 else
4064 {
4065 if (!prev_stmt_info)
4066 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4067 else
4068 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4069 prev_stmt_info = vinfo_for_stmt (new_stmt);
4070 }
4071 }
4072 }
4073
4074 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4075 break;
4076
4077 case NARROW:
4078 /* In case the vectorization factor (VF) is bigger than the number
4079 of elements that we can fit in a vectype (nunits), we have to
4080 generate more than one vector stmt - i.e - we need to "unroll"
4081 the vector stmt by a factor VF/nunits. */
4082 for (j = 0; j < ncopies; j++)
4083 {
4084 /* Handle uses. */
4085 if (slp_node)
4086 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4087 slp_node, -1);
4088 else
4089 {
4090 vec_oprnds0.truncate (0);
4091 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4092 vect_pow2 (multi_step_cvt) - 1);
4093 }
4094
4095 /* Arguments are ready. Create the new vector stmts. */
4096 if (cvt_type)
4097 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4098 {
4099 if (codecvt1 == CALL_EXPR)
4100 {
4101 new_stmt = gimple_build_call (decl1, 1, vop0);
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 gimple_call_set_lhs (new_stmt, new_temp);
4104 }
4105 else
4106 {
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4108 new_temp = make_ssa_name (vec_dest);
4109 new_stmt = gimple_build_assign (new_temp, codecvt1,
4110 vop0);
4111 }
4112
4113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4114 vec_oprnds0[i] = new_temp;
4115 }
4116
4117 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4118 stmt, vec_dsts, gsi,
4119 slp_node, code1,
4120 &prev_stmt_info);
4121 }
4122
4123 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4124 break;
4125 }
4126
4127 vec_oprnds0.release ();
4128 vec_oprnds1.release ();
4129 vec_dsts.release ();
4130 interm_types.release ();
4131
4132 return true;
4133 }
4134
4135
4136 /* Function vectorizable_assignment.
4137
4138 Check if STMT performs an assignment (copy) that can be vectorized.
4139 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4140 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4141 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4142
4143 static bool
4144 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4145 gimple *vec_stmt, slp_tree slp_node)
4146 {
4147 tree vec_dest;
4148 tree scalar_dest;
4149 tree op;
4150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4151 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4152 tree new_temp;
4153 tree def;
4154 gimple def_stmt;
4155 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4156 int ncopies;
4157 int i, j;
4158 vec<tree> vec_oprnds = vNULL;
4159 tree vop;
4160 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4161 gimple new_stmt = NULL;
4162 stmt_vec_info prev_stmt_info = NULL;
4163 enum tree_code code;
4164 tree vectype_in;
4165
4166 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4167 return false;
4168
4169 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4170 return false;
4171
4172 /* Is vectorizable assignment? */
4173 if (!is_gimple_assign (stmt))
4174 return false;
4175
4176 scalar_dest = gimple_assign_lhs (stmt);
4177 if (TREE_CODE (scalar_dest) != SSA_NAME)
4178 return false;
4179
4180 code = gimple_assign_rhs_code (stmt);
4181 if (gimple_assign_single_p (stmt)
4182 || code == PAREN_EXPR
4183 || CONVERT_EXPR_CODE_P (code))
4184 op = gimple_assign_rhs1 (stmt);
4185 else
4186 return false;
4187
4188 if (code == VIEW_CONVERT_EXPR)
4189 op = TREE_OPERAND (op, 0);
4190
4191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4192 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4193
4194 /* Multiple types in SLP are handled by creating the appropriate number of
4195 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4196 case of SLP. */
4197 if (slp_node || PURE_SLP_STMT (stmt_info))
4198 ncopies = 1;
4199 else
4200 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4201
4202 gcc_assert (ncopies >= 1);
4203
4204 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4205 &def_stmt, &def, &dt[0], &vectype_in))
4206 {
4207 if (dump_enabled_p ())
4208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4209 "use not simple.\n");
4210 return false;
4211 }
4212
4213 /* We can handle NOP_EXPR conversions that do not change the number
4214 of elements or the vector size. */
4215 if ((CONVERT_EXPR_CODE_P (code)
4216 || code == VIEW_CONVERT_EXPR)
4217 && (!vectype_in
4218 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4219 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4220 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4221 return false;
4222
4223 /* We do not handle bit-precision changes. */
4224 if ((CONVERT_EXPR_CODE_P (code)
4225 || code == VIEW_CONVERT_EXPR)
4226 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4227 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4228 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4229 || ((TYPE_PRECISION (TREE_TYPE (op))
4230 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4231 /* But a conversion that does not change the bit-pattern is ok. */
4232 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4233 > TYPE_PRECISION (TREE_TYPE (op)))
4234 && TYPE_UNSIGNED (TREE_TYPE (op))))
4235 {
4236 if (dump_enabled_p ())
4237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4238 "type conversion to/from bit-precision "
4239 "unsupported.\n");
4240 return false;
4241 }
4242
4243 if (!vec_stmt) /* transformation not required. */
4244 {
4245 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4246 if (dump_enabled_p ())
4247 dump_printf_loc (MSG_NOTE, vect_location,
4248 "=== vectorizable_assignment ===\n");
4249 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4250 return true;
4251 }
4252
4253 /** Transform. **/
4254 if (dump_enabled_p ())
4255 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4256
4257 /* Handle def. */
4258 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4259
4260 /* Handle use. */
4261 for (j = 0; j < ncopies; j++)
4262 {
4263 /* Handle uses. */
4264 if (j == 0)
4265 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4266 else
4267 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4268
4269 /* Arguments are ready. create the new vector stmt. */
4270 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4271 {
4272 if (CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
4274 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4275 new_stmt = gimple_build_assign (vec_dest, vop);
4276 new_temp = make_ssa_name (vec_dest, new_stmt);
4277 gimple_assign_set_lhs (new_stmt, new_temp);
4278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4279 if (slp_node)
4280 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4281 }
4282
4283 if (slp_node)
4284 continue;
4285
4286 if (j == 0)
4287 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4288 else
4289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4290
4291 prev_stmt_info = vinfo_for_stmt (new_stmt);
4292 }
4293
4294 vec_oprnds.release ();
4295 return true;
4296 }
4297
4298
4299 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4300 either as shift by a scalar or by a vector. */
4301
4302 bool
4303 vect_supportable_shift (enum tree_code code, tree scalar_type)
4304 {
4305
4306 machine_mode vec_mode;
4307 optab optab;
4308 int icode;
4309 tree vectype;
4310
4311 vectype = get_vectype_for_scalar_type (scalar_type);
4312 if (!vectype)
4313 return false;
4314
4315 optab = optab_for_tree_code (code, vectype, optab_scalar);
4316 if (!optab
4317 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4318 {
4319 optab = optab_for_tree_code (code, vectype, optab_vector);
4320 if (!optab
4321 || (optab_handler (optab, TYPE_MODE (vectype))
4322 == CODE_FOR_nothing))
4323 return false;
4324 }
4325
4326 vec_mode = TYPE_MODE (vectype);
4327 icode = (int) optab_handler (optab, vec_mode);
4328 if (icode == CODE_FOR_nothing)
4329 return false;
4330
4331 return true;
4332 }
4333
4334
4335 /* Function vectorizable_shift.
4336
4337 Check if STMT performs a shift operation that can be vectorized.
4338 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4339 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4340 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4341
4342 static bool
4343 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4344 gimple *vec_stmt, slp_tree slp_node)
4345 {
4346 tree vec_dest;
4347 tree scalar_dest;
4348 tree op0, op1 = NULL;
4349 tree vec_oprnd1 = NULL_TREE;
4350 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4351 tree vectype;
4352 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4353 enum tree_code code;
4354 machine_mode vec_mode;
4355 tree new_temp;
4356 optab optab;
4357 int icode;
4358 machine_mode optab_op2_mode;
4359 tree def;
4360 gimple def_stmt;
4361 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4362 gimple new_stmt = NULL;
4363 stmt_vec_info prev_stmt_info;
4364 int nunits_in;
4365 int nunits_out;
4366 tree vectype_out;
4367 tree op1_vectype;
4368 int ncopies;
4369 int j, i;
4370 vec<tree> vec_oprnds0 = vNULL;
4371 vec<tree> vec_oprnds1 = vNULL;
4372 tree vop0, vop1;
4373 unsigned int k;
4374 bool scalar_shift_arg = true;
4375 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4376 int vf;
4377
4378 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4379 return false;
4380
4381 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4382 return false;
4383
4384 /* Is STMT a vectorizable binary/unary operation? */
4385 if (!is_gimple_assign (stmt))
4386 return false;
4387
4388 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4389 return false;
4390
4391 code = gimple_assign_rhs_code (stmt);
4392
4393 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4394 || code == RROTATE_EXPR))
4395 return false;
4396
4397 scalar_dest = gimple_assign_lhs (stmt);
4398 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4399 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4400 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4401 {
4402 if (dump_enabled_p ())
4403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4404 "bit-precision shifts not supported.\n");
4405 return false;
4406 }
4407
4408 op0 = gimple_assign_rhs1 (stmt);
4409 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4410 &def_stmt, &def, &dt[0], &vectype))
4411 {
4412 if (dump_enabled_p ())
4413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4414 "use not simple.\n");
4415 return false;
4416 }
4417 /* If op0 is an external or constant def use a vector type with
4418 the same size as the output vector type. */
4419 if (!vectype)
4420 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4421 if (vec_stmt)
4422 gcc_assert (vectype);
4423 if (!vectype)
4424 {
4425 if (dump_enabled_p ())
4426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4427 "no vectype for scalar type\n");
4428 return false;
4429 }
4430
4431 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4432 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4433 if (nunits_out != nunits_in)
4434 return false;
4435
4436 op1 = gimple_assign_rhs2 (stmt);
4437 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4438 &def, &dt[1], &op1_vectype))
4439 {
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4442 "use not simple.\n");
4443 return false;
4444 }
4445
4446 if (loop_vinfo)
4447 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4448 else
4449 vf = 1;
4450
4451 /* Multiple types in SLP are handled by creating the appropriate number of
4452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4453 case of SLP. */
4454 if (slp_node || PURE_SLP_STMT (stmt_info))
4455 ncopies = 1;
4456 else
4457 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4458
4459 gcc_assert (ncopies >= 1);
4460
4461 /* Determine whether the shift amount is a vector, or scalar. If the
4462 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4463
4464 if ((dt[1] == vect_internal_def
4465 || dt[1] == vect_induction_def)
4466 && !slp_node)
4467 scalar_shift_arg = false;
4468 else if (dt[1] == vect_constant_def
4469 || dt[1] == vect_external_def
4470 || dt[1] == vect_internal_def)
4471 {
4472 /* In SLP, need to check whether the shift count is the same,
4473 in loops if it is a constant or invariant, it is always
4474 a scalar shift. */
4475 if (slp_node)
4476 {
4477 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4478 gimple slpstmt;
4479
4480 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4481 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4482 scalar_shift_arg = false;
4483 }
4484 }
4485 else
4486 {
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4489 "operand mode requires invariant argument.\n");
4490 return false;
4491 }
4492
4493 /* Vector shifted by vector. */
4494 if (!scalar_shift_arg)
4495 {
4496 optab = optab_for_tree_code (code, vectype, optab_vector);
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_NOTE, vect_location,
4499 "vector/vector shift/rotate found.\n");
4500
4501 if (!op1_vectype)
4502 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4503 if (op1_vectype == NULL_TREE
4504 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4505 {
4506 if (dump_enabled_p ())
4507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4508 "unusable type for last operand in"
4509 " vector/vector shift/rotate.\n");
4510 return false;
4511 }
4512 }
4513 /* See if the machine has a vector shifted by scalar insn and if not
4514 then see if it has a vector shifted by vector insn. */
4515 else
4516 {
4517 optab = optab_for_tree_code (code, vectype, optab_scalar);
4518 if (optab
4519 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4520 {
4521 if (dump_enabled_p ())
4522 dump_printf_loc (MSG_NOTE, vect_location,
4523 "vector/scalar shift/rotate found.\n");
4524 }
4525 else
4526 {
4527 optab = optab_for_tree_code (code, vectype, optab_vector);
4528 if (optab
4529 && (optab_handler (optab, TYPE_MODE (vectype))
4530 != CODE_FOR_nothing))
4531 {
4532 scalar_shift_arg = false;
4533
4534 if (dump_enabled_p ())
4535 dump_printf_loc (MSG_NOTE, vect_location,
4536 "vector/vector shift/rotate found.\n");
4537
4538 /* Unlike the other binary operators, shifts/rotates have
4539 the rhs being int, instead of the same type as the lhs,
4540 so make sure the scalar is the right type if we are
4541 dealing with vectors of long long/long/short/char. */
4542 if (dt[1] == vect_constant_def)
4543 op1 = fold_convert (TREE_TYPE (vectype), op1);
4544 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4545 TREE_TYPE (op1)))
4546 {
4547 if (slp_node
4548 && TYPE_MODE (TREE_TYPE (vectype))
4549 != TYPE_MODE (TREE_TYPE (op1)))
4550 {
4551 if (dump_enabled_p ())
4552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4553 "unusable type for last operand in"
4554 " vector/vector shift/rotate.\n");
4555 return false;
4556 }
4557 if (vec_stmt && !slp_node)
4558 {
4559 op1 = fold_convert (TREE_TYPE (vectype), op1);
4560 op1 = vect_init_vector (stmt, op1,
4561 TREE_TYPE (vectype), NULL);
4562 }
4563 }
4564 }
4565 }
4566 }
4567
4568 /* Supportable by target? */
4569 if (!optab)
4570 {
4571 if (dump_enabled_p ())
4572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4573 "no optab.\n");
4574 return false;
4575 }
4576 vec_mode = TYPE_MODE (vectype);
4577 icode = (int) optab_handler (optab, vec_mode);
4578 if (icode == CODE_FOR_nothing)
4579 {
4580 if (dump_enabled_p ())
4581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4582 "op not supported by target.\n");
4583 /* Check only during analysis. */
4584 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4585 || (vf < vect_min_worthwhile_factor (code)
4586 && !vec_stmt))
4587 return false;
4588 if (dump_enabled_p ())
4589 dump_printf_loc (MSG_NOTE, vect_location,
4590 "proceeding using word mode.\n");
4591 }
4592
4593 /* Worthwhile without SIMD support? Check only during analysis. */
4594 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4595 && vf < vect_min_worthwhile_factor (code)
4596 && !vec_stmt)
4597 {
4598 if (dump_enabled_p ())
4599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4600 "not worthwhile without SIMD support.\n");
4601 return false;
4602 }
4603
4604 if (!vec_stmt) /* transformation not required. */
4605 {
4606 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4607 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_NOTE, vect_location,
4609 "=== vectorizable_shift ===\n");
4610 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4611 return true;
4612 }
4613
4614 /** Transform. **/
4615
4616 if (dump_enabled_p ())
4617 dump_printf_loc (MSG_NOTE, vect_location,
4618 "transform binary/unary operation.\n");
4619
4620 /* Handle def. */
4621 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4622
4623 prev_stmt_info = NULL;
4624 for (j = 0; j < ncopies; j++)
4625 {
4626 /* Handle uses. */
4627 if (j == 0)
4628 {
4629 if (scalar_shift_arg)
4630 {
4631 /* Vector shl and shr insn patterns can be defined with scalar
4632 operand 2 (shift operand). In this case, use constant or loop
4633 invariant op1 directly, without extending it to vector mode
4634 first. */
4635 optab_op2_mode = insn_data[icode].operand[2].mode;
4636 if (!VECTOR_MODE_P (optab_op2_mode))
4637 {
4638 if (dump_enabled_p ())
4639 dump_printf_loc (MSG_NOTE, vect_location,
4640 "operand 1 using scalar mode.\n");
4641 vec_oprnd1 = op1;
4642 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4643 vec_oprnds1.quick_push (vec_oprnd1);
4644 if (slp_node)
4645 {
4646 /* Store vec_oprnd1 for every vector stmt to be created
4647 for SLP_NODE. We check during the analysis that all
4648 the shift arguments are the same.
4649 TODO: Allow different constants for different vector
4650 stmts generated for an SLP instance. */
4651 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4652 vec_oprnds1.quick_push (vec_oprnd1);
4653 }
4654 }
4655 }
4656
4657 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4658 (a special case for certain kind of vector shifts); otherwise,
4659 operand 1 should be of a vector type (the usual case). */
4660 if (vec_oprnd1)
4661 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4662 slp_node, -1);
4663 else
4664 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4665 slp_node, -1);
4666 }
4667 else
4668 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4669
4670 /* Arguments are ready. Create the new vector stmt. */
4671 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4672 {
4673 vop1 = vec_oprnds1[i];
4674 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4675 new_temp = make_ssa_name (vec_dest, new_stmt);
4676 gimple_assign_set_lhs (new_stmt, new_temp);
4677 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4678 if (slp_node)
4679 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4680 }
4681
4682 if (slp_node)
4683 continue;
4684
4685 if (j == 0)
4686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4687 else
4688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4689 prev_stmt_info = vinfo_for_stmt (new_stmt);
4690 }
4691
4692 vec_oprnds0.release ();
4693 vec_oprnds1.release ();
4694
4695 return true;
4696 }
4697
4698
4699 /* Function vectorizable_operation.
4700
4701 Check if STMT performs a binary, unary or ternary operation that can
4702 be vectorized.
4703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4704 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4706
4707 static bool
4708 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4709 gimple *vec_stmt, slp_tree slp_node)
4710 {
4711 tree vec_dest;
4712 tree scalar_dest;
4713 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4714 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4715 tree vectype;
4716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4717 enum tree_code code;
4718 machine_mode vec_mode;
4719 tree new_temp;
4720 int op_type;
4721 optab optab;
4722 int icode;
4723 tree def;
4724 gimple def_stmt;
4725 enum vect_def_type dt[3]
4726 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4727 gimple new_stmt = NULL;
4728 stmt_vec_info prev_stmt_info;
4729 int nunits_in;
4730 int nunits_out;
4731 tree vectype_out;
4732 int ncopies;
4733 int j, i;
4734 vec<tree> vec_oprnds0 = vNULL;
4735 vec<tree> vec_oprnds1 = vNULL;
4736 vec<tree> vec_oprnds2 = vNULL;
4737 tree vop0, vop1, vop2;
4738 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4739 int vf;
4740
4741 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4742 return false;
4743
4744 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4745 return false;
4746
4747 /* Is STMT a vectorizable binary/unary operation? */
4748 if (!is_gimple_assign (stmt))
4749 return false;
4750
4751 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4752 return false;
4753
4754 code = gimple_assign_rhs_code (stmt);
4755
4756 /* For pointer addition, we should use the normal plus for
4757 the vector addition. */
4758 if (code == POINTER_PLUS_EXPR)
4759 code = PLUS_EXPR;
4760
4761 /* Support only unary or binary operations. */
4762 op_type = TREE_CODE_LENGTH (code);
4763 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4764 {
4765 if (dump_enabled_p ())
4766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4767 "num. args = %d (not unary/binary/ternary op).\n",
4768 op_type);
4769 return false;
4770 }
4771
4772 scalar_dest = gimple_assign_lhs (stmt);
4773 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4774
4775 /* Most operations cannot handle bit-precision types without extra
4776 truncations. */
4777 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4778 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4779 /* Exception are bitwise binary operations. */
4780 && code != BIT_IOR_EXPR
4781 && code != BIT_XOR_EXPR
4782 && code != BIT_AND_EXPR)
4783 {
4784 if (dump_enabled_p ())
4785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4786 "bit-precision arithmetic not supported.\n");
4787 return false;
4788 }
4789
4790 op0 = gimple_assign_rhs1 (stmt);
4791 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4792 &def_stmt, &def, &dt[0], &vectype))
4793 {
4794 if (dump_enabled_p ())
4795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4796 "use not simple.\n");
4797 return false;
4798 }
4799 /* If op0 is an external or constant def use a vector type with
4800 the same size as the output vector type. */
4801 if (!vectype)
4802 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4803 if (vec_stmt)
4804 gcc_assert (vectype);
4805 if (!vectype)
4806 {
4807 if (dump_enabled_p ())
4808 {
4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4810 "no vectype for scalar type ");
4811 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4812 TREE_TYPE (op0));
4813 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4814 }
4815
4816 return false;
4817 }
4818
4819 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4820 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4821 if (nunits_out != nunits_in)
4822 return false;
4823
4824 if (op_type == binary_op || op_type == ternary_op)
4825 {
4826 op1 = gimple_assign_rhs2 (stmt);
4827 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4828 &def, &dt[1]))
4829 {
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4832 "use not simple.\n");
4833 return false;
4834 }
4835 }
4836 if (op_type == ternary_op)
4837 {
4838 op2 = gimple_assign_rhs3 (stmt);
4839 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4840 &def, &dt[2]))
4841 {
4842 if (dump_enabled_p ())
4843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4844 "use not simple.\n");
4845 return false;
4846 }
4847 }
4848
4849 if (loop_vinfo)
4850 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4851 else
4852 vf = 1;
4853
4854 /* Multiple types in SLP are handled by creating the appropriate number of
4855 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4856 case of SLP. */
4857 if (slp_node || PURE_SLP_STMT (stmt_info))
4858 ncopies = 1;
4859 else
4860 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4861
4862 gcc_assert (ncopies >= 1);
4863
4864 /* Shifts are handled in vectorizable_shift (). */
4865 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4866 || code == RROTATE_EXPR)
4867 return false;
4868
4869 /* Supportable by target? */
4870
4871 vec_mode = TYPE_MODE (vectype);
4872 if (code == MULT_HIGHPART_EXPR)
4873 {
4874 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4875 icode = LAST_INSN_CODE;
4876 else
4877 icode = CODE_FOR_nothing;
4878 }
4879 else
4880 {
4881 optab = optab_for_tree_code (code, vectype, optab_default);
4882 if (!optab)
4883 {
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886 "no optab.\n");
4887 return false;
4888 }
4889 icode = (int) optab_handler (optab, vec_mode);
4890 }
4891
4892 if (icode == CODE_FOR_nothing)
4893 {
4894 if (dump_enabled_p ())
4895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4896 "op not supported by target.\n");
4897 /* Check only during analysis. */
4898 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4899 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4900 return false;
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_NOTE, vect_location,
4903 "proceeding using word mode.\n");
4904 }
4905
4906 /* Worthwhile without SIMD support? Check only during analysis. */
4907 if (!VECTOR_MODE_P (vec_mode)
4908 && !vec_stmt
4909 && vf < vect_min_worthwhile_factor (code))
4910 {
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4913 "not worthwhile without SIMD support.\n");
4914 return false;
4915 }
4916
4917 if (!vec_stmt) /* transformation not required. */
4918 {
4919 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_NOTE, vect_location,
4922 "=== vectorizable_operation ===\n");
4923 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4924 return true;
4925 }
4926
4927 /** Transform. **/
4928
4929 if (dump_enabled_p ())
4930 dump_printf_loc (MSG_NOTE, vect_location,
4931 "transform binary/unary operation.\n");
4932
4933 /* Handle def. */
4934 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4935
4936 /* In case the vectorization factor (VF) is bigger than the number
4937 of elements that we can fit in a vectype (nunits), we have to generate
4938 more than one vector stmt - i.e - we need to "unroll" the
4939 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4940 from one copy of the vector stmt to the next, in the field
4941 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4942 stages to find the correct vector defs to be used when vectorizing
4943 stmts that use the defs of the current stmt. The example below
4944 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4945 we need to create 4 vectorized stmts):
4946
4947 before vectorization:
4948 RELATED_STMT VEC_STMT
4949 S1: x = memref - -
4950 S2: z = x + 1 - -
4951
4952 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4953 there):
4954 RELATED_STMT VEC_STMT
4955 VS1_0: vx0 = memref0 VS1_1 -
4956 VS1_1: vx1 = memref1 VS1_2 -
4957 VS1_2: vx2 = memref2 VS1_3 -
4958 VS1_3: vx3 = memref3 - -
4959 S1: x = load - VS1_0
4960 S2: z = x + 1 - -
4961
4962 step2: vectorize stmt S2 (done here):
4963 To vectorize stmt S2 we first need to find the relevant vector
4964 def for the first operand 'x'. This is, as usual, obtained from
4965 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4966 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4967 relevant vector def 'vx0'. Having found 'vx0' we can generate
4968 the vector stmt VS2_0, and as usual, record it in the
4969 STMT_VINFO_VEC_STMT of stmt S2.
4970 When creating the second copy (VS2_1), we obtain the relevant vector
4971 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4972 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4973 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4974 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4975 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4976 chain of stmts and pointers:
4977 RELATED_STMT VEC_STMT
4978 VS1_0: vx0 = memref0 VS1_1 -
4979 VS1_1: vx1 = memref1 VS1_2 -
4980 VS1_2: vx2 = memref2 VS1_3 -
4981 VS1_3: vx3 = memref3 - -
4982 S1: x = load - VS1_0
4983 VS2_0: vz0 = vx0 + v1 VS2_1 -
4984 VS2_1: vz1 = vx1 + v1 VS2_2 -
4985 VS2_2: vz2 = vx2 + v1 VS2_3 -
4986 VS2_3: vz3 = vx3 + v1 - -
4987 S2: z = x + 1 - VS2_0 */
4988
4989 prev_stmt_info = NULL;
4990 for (j = 0; j < ncopies; j++)
4991 {
4992 /* Handle uses. */
4993 if (j == 0)
4994 {
4995 if (op_type == binary_op || op_type == ternary_op)
4996 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4997 slp_node, -1);
4998 else
4999 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5000 slp_node, -1);
5001 if (op_type == ternary_op)
5002 {
5003 vec_oprnds2.create (1);
5004 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5005 stmt,
5006 NULL));
5007 }
5008 }
5009 else
5010 {
5011 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5012 if (op_type == ternary_op)
5013 {
5014 tree vec_oprnd = vec_oprnds2.pop ();
5015 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5016 vec_oprnd));
5017 }
5018 }
5019
5020 /* Arguments are ready. Create the new vector stmt. */
5021 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5022 {
5023 vop1 = ((op_type == binary_op || op_type == ternary_op)
5024 ? vec_oprnds1[i] : NULL_TREE);
5025 vop2 = ((op_type == ternary_op)
5026 ? vec_oprnds2[i] : NULL_TREE);
5027 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5028 new_temp = make_ssa_name (vec_dest, new_stmt);
5029 gimple_assign_set_lhs (new_stmt, new_temp);
5030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5031 if (slp_node)
5032 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5033 }
5034
5035 if (slp_node)
5036 continue;
5037
5038 if (j == 0)
5039 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5040 else
5041 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5042 prev_stmt_info = vinfo_for_stmt (new_stmt);
5043 }
5044
5045 vec_oprnds0.release ();
5046 vec_oprnds1.release ();
5047 vec_oprnds2.release ();
5048
5049 return true;
5050 }
5051
5052 /* A helper function to ensure data reference DR's base alignment
5053 for STMT_INFO. */
5054
5055 static void
5056 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5057 {
5058 if (!dr->aux)
5059 return;
5060
5061 if (DR_VECT_AUX (dr)->base_misaligned)
5062 {
5063 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5064 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5065
5066 if (decl_in_symtab_p (base_decl))
5067 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5068 else
5069 {
5070 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5071 DECL_USER_ALIGN (base_decl) = 1;
5072 }
5073 DR_VECT_AUX (dr)->base_misaligned = false;
5074 }
5075 }
5076
5077
5078 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5079 reversal of the vector elements. If that is impossible to do,
5080 returns NULL. */
5081
5082 static tree
5083 perm_mask_for_reverse (tree vectype)
5084 {
5085 int i, nunits;
5086 unsigned char *sel;
5087
5088 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5089 sel = XALLOCAVEC (unsigned char, nunits);
5090
5091 for (i = 0; i < nunits; ++i)
5092 sel[i] = nunits - 1 - i;
5093
5094 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5095 return NULL_TREE;
5096 return vect_gen_perm_mask_checked (vectype, sel);
5097 }
5098
5099 /* Function vectorizable_store.
5100
5101 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5102 can be vectorized.
5103 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5104 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5105 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5106
5107 static bool
5108 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5109 slp_tree slp_node)
5110 {
5111 tree scalar_dest;
5112 tree data_ref;
5113 tree op;
5114 tree vec_oprnd = NULL_TREE;
5115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5116 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5117 tree elem_type;
5118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5119 struct loop *loop = NULL;
5120 machine_mode vec_mode;
5121 tree dummy;
5122 enum dr_alignment_support alignment_support_scheme;
5123 tree def;
5124 gimple def_stmt;
5125 enum vect_def_type dt;
5126 stmt_vec_info prev_stmt_info = NULL;
5127 tree dataref_ptr = NULL_TREE;
5128 tree dataref_offset = NULL_TREE;
5129 gimple ptr_incr = NULL;
5130 int ncopies;
5131 int j;
5132 gimple next_stmt, first_stmt = NULL;
5133 bool grouped_store = false;
5134 bool store_lanes_p = false;
5135 unsigned int group_size, i;
5136 vec<tree> dr_chain = vNULL;
5137 vec<tree> oprnds = vNULL;
5138 vec<tree> result_chain = vNULL;
5139 bool inv_p;
5140 bool negative = false;
5141 tree offset = NULL_TREE;
5142 vec<tree> vec_oprnds = vNULL;
5143 bool slp = (slp_node != NULL);
5144 unsigned int vec_num;
5145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5146 tree aggr_type;
5147
5148 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5149 return false;
5150
5151 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5152 return false;
5153
5154 /* Is vectorizable store? */
5155
5156 if (!is_gimple_assign (stmt))
5157 return false;
5158
5159 scalar_dest = gimple_assign_lhs (stmt);
5160 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5161 && is_pattern_stmt_p (stmt_info))
5162 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5163 if (TREE_CODE (scalar_dest) != ARRAY_REF
5164 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5165 && TREE_CODE (scalar_dest) != INDIRECT_REF
5166 && TREE_CODE (scalar_dest) != COMPONENT_REF
5167 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5168 && TREE_CODE (scalar_dest) != REALPART_EXPR
5169 && TREE_CODE (scalar_dest) != MEM_REF)
5170 return false;
5171
5172 gcc_assert (gimple_assign_single_p (stmt));
5173
5174 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5175 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5176
5177 if (loop_vinfo)
5178 loop = LOOP_VINFO_LOOP (loop_vinfo);
5179
5180 /* Multiple types in SLP are handled by creating the appropriate number of
5181 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5182 case of SLP. */
5183 if (slp || PURE_SLP_STMT (stmt_info))
5184 ncopies = 1;
5185 else
5186 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5187
5188 gcc_assert (ncopies >= 1);
5189
5190 /* FORNOW. This restriction should be relaxed. */
5191 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5192 {
5193 if (dump_enabled_p ())
5194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5195 "multiple types in nested loop.\n");
5196 return false;
5197 }
5198
5199 op = gimple_assign_rhs1 (stmt);
5200 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5201 &def, &dt))
5202 {
5203 if (dump_enabled_p ())
5204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5205 "use not simple.\n");
5206 return false;
5207 }
5208
5209 elem_type = TREE_TYPE (vectype);
5210 vec_mode = TYPE_MODE (vectype);
5211
5212 /* FORNOW. In some cases can vectorize even if data-type not supported
5213 (e.g. - array initialization with 0). */
5214 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5215 return false;
5216
5217 if (!STMT_VINFO_DATA_REF (stmt_info))
5218 return false;
5219
5220 if (!STMT_VINFO_STRIDED_P (stmt_info))
5221 {
5222 negative =
5223 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5224 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5225 size_zero_node) < 0;
5226 if (negative && ncopies > 1)
5227 {
5228 if (dump_enabled_p ())
5229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5230 "multiple types with negative step.\n");
5231 return false;
5232 }
5233 if (negative)
5234 {
5235 gcc_assert (!grouped_store);
5236 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5237 if (alignment_support_scheme != dr_aligned
5238 && alignment_support_scheme != dr_unaligned_supported)
5239 {
5240 if (dump_enabled_p ())
5241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5242 "negative step but alignment required.\n");
5243 return false;
5244 }
5245 if (dt != vect_constant_def
5246 && dt != vect_external_def
5247 && !perm_mask_for_reverse (vectype))
5248 {
5249 if (dump_enabled_p ())
5250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5251 "negative step and reversing not supported.\n");
5252 return false;
5253 }
5254 }
5255 }
5256
5257 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5258 {
5259 grouped_store = true;
5260 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5261 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5262 if (!slp
5263 && !PURE_SLP_STMT (stmt_info)
5264 && !STMT_VINFO_STRIDED_P (stmt_info))
5265 {
5266 if (vect_store_lanes_supported (vectype, group_size))
5267 store_lanes_p = true;
5268 else if (!vect_grouped_store_supported (vectype, group_size))
5269 return false;
5270 }
5271
5272 if (STMT_VINFO_STRIDED_P (stmt_info)
5273 && (slp || PURE_SLP_STMT (stmt_info))
5274 && (group_size > nunits
5275 || nunits % group_size != 0))
5276 {
5277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278 "unhandled strided group store\n");
5279 return false;
5280 }
5281
5282 if (first_stmt == stmt)
5283 {
5284 /* STMT is the leader of the group. Check the operands of all the
5285 stmts of the group. */
5286 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5287 while (next_stmt)
5288 {
5289 gcc_assert (gimple_assign_single_p (next_stmt));
5290 op = gimple_assign_rhs1 (next_stmt);
5291 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5292 &def_stmt, &def, &dt))
5293 {
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5296 "use not simple.\n");
5297 return false;
5298 }
5299 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5300 }
5301 }
5302 }
5303
5304 if (!vec_stmt) /* transformation not required. */
5305 {
5306 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5307 /* The SLP costs are calculated during SLP analysis. */
5308 if (!PURE_SLP_STMT (stmt_info))
5309 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5310 NULL, NULL, NULL);
5311 return true;
5312 }
5313
5314 /** Transform. **/
5315
5316 ensure_base_align (stmt_info, dr);
5317
5318 if (grouped_store)
5319 {
5320 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5321 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5322
5323 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5324
5325 /* FORNOW */
5326 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5327
5328 /* We vectorize all the stmts of the interleaving group when we
5329 reach the last stmt in the group. */
5330 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5331 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5332 && !slp)
5333 {
5334 *vec_stmt = NULL;
5335 return true;
5336 }
5337
5338 if (slp)
5339 {
5340 grouped_store = false;
5341 /* VEC_NUM is the number of vect stmts to be created for this
5342 group. */
5343 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5344 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5345 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5346 op = gimple_assign_rhs1 (first_stmt);
5347 }
5348 else
5349 /* VEC_NUM is the number of vect stmts to be created for this
5350 group. */
5351 vec_num = group_size;
5352 }
5353 else
5354 {
5355 first_stmt = stmt;
5356 first_dr = dr;
5357 group_size = vec_num = 1;
5358 }
5359
5360 if (dump_enabled_p ())
5361 dump_printf_loc (MSG_NOTE, vect_location,
5362 "transform store. ncopies = %d\n", ncopies);
5363
5364 if (STMT_VINFO_STRIDED_P (stmt_info))
5365 {
5366 gimple_stmt_iterator incr_gsi;
5367 bool insert_after;
5368 gimple incr;
5369 tree offvar;
5370 tree ivstep;
5371 tree running_off;
5372 gimple_seq stmts = NULL;
5373 tree stride_base, stride_step, alias_off;
5374 tree vec_oprnd;
5375 unsigned int g;
5376
5377 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5378
5379 stride_base
5380 = fold_build_pointer_plus
5381 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5382 size_binop (PLUS_EXPR,
5383 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5384 convert_to_ptrofftype (DR_INIT(first_dr))));
5385 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5386
5387 /* For a store with loop-invariant (but other than power-of-2)
5388 stride (i.e. not a grouped access) like so:
5389
5390 for (i = 0; i < n; i += stride)
5391 array[i] = ...;
5392
5393 we generate a new induction variable and new stores from
5394 the components of the (vectorized) rhs:
5395
5396 for (j = 0; ; j += VF*stride)
5397 vectemp = ...;
5398 tmp1 = vectemp[0];
5399 array[j] = tmp1;
5400 tmp2 = vectemp[1];
5401 array[j + stride] = tmp2;
5402 ...
5403 */
5404
5405 unsigned nstores = nunits;
5406 tree ltype = elem_type;
5407 if (slp)
5408 {
5409 nstores = nunits / group_size;
5410 if (group_size < nunits)
5411 ltype = build_vector_type (elem_type, group_size);
5412 else
5413 ltype = vectype;
5414 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5415 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5416 group_size = 1;
5417 }
5418
5419 ivstep = stride_step;
5420 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5421 build_int_cst (TREE_TYPE (ivstep),
5422 ncopies * nstores));
5423
5424 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5425
5426 create_iv (stride_base, ivstep, NULL,
5427 loop, &incr_gsi, insert_after,
5428 &offvar, NULL);
5429 incr = gsi_stmt (incr_gsi);
5430 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5431
5432 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5433 if (stmts)
5434 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5435
5436 prev_stmt_info = NULL;
5437 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5438 next_stmt = first_stmt;
5439 for (g = 0; g < group_size; g++)
5440 {
5441 running_off = offvar;
5442 if (g)
5443 {
5444 tree size = TYPE_SIZE_UNIT (ltype);
5445 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5446 size);
5447 tree newoff = copy_ssa_name (running_off, NULL);
5448 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5449 running_off, pos);
5450 vect_finish_stmt_generation (stmt, incr, gsi);
5451 running_off = newoff;
5452 }
5453 for (j = 0; j < ncopies; j++)
5454 {
5455 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5456 and first_stmt == stmt. */
5457 if (j == 0)
5458 {
5459 if (slp)
5460 {
5461 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5462 slp_node, -1);
5463 vec_oprnd = vec_oprnds[0];
5464 }
5465 else
5466 {
5467 gcc_assert (gimple_assign_single_p (next_stmt));
5468 op = gimple_assign_rhs1 (next_stmt);
5469 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5470 NULL);
5471 }
5472 }
5473 else
5474 {
5475 if (slp)
5476 vec_oprnd = vec_oprnds[j];
5477 else
5478 {
5479 vect_is_simple_use (vec_oprnd, NULL, loop_vinfo,
5480 bb_vinfo, &def_stmt, &def, &dt);
5481 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5482 }
5483 }
5484
5485 for (i = 0; i < nstores; i++)
5486 {
5487 tree newref, newoff;
5488 gimple incr, assign;
5489 tree size = TYPE_SIZE (ltype);
5490 /* Extract the i'th component. */
5491 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5492 bitsize_int (i), size);
5493 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5494 size, pos);
5495
5496 elem = force_gimple_operand_gsi (gsi, elem, true,
5497 NULL_TREE, true,
5498 GSI_SAME_STMT);
5499
5500 newref = build2 (MEM_REF, ltype,
5501 running_off, alias_off);
5502
5503 /* And store it to *running_off. */
5504 assign = gimple_build_assign (newref, elem);
5505 vect_finish_stmt_generation (stmt, assign, gsi);
5506
5507 newoff = copy_ssa_name (running_off, NULL);
5508 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5509 running_off, stride_step);
5510 vect_finish_stmt_generation (stmt, incr, gsi);
5511
5512 running_off = newoff;
5513 if (g == group_size - 1
5514 && !slp)
5515 {
5516 if (j == 0 && i == 0)
5517 STMT_VINFO_VEC_STMT (stmt_info)
5518 = *vec_stmt = assign;
5519 else
5520 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5521 prev_stmt_info = vinfo_for_stmt (assign);
5522 }
5523 }
5524 }
5525 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5526 }
5527 return true;
5528 }
5529
5530 dr_chain.create (group_size);
5531 oprnds.create (group_size);
5532
5533 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5534 gcc_assert (alignment_support_scheme);
5535 /* Targets with store-lane instructions must not require explicit
5536 realignment. */
5537 gcc_assert (!store_lanes_p
5538 || alignment_support_scheme == dr_aligned
5539 || alignment_support_scheme == dr_unaligned_supported);
5540
5541 if (negative)
5542 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5543
5544 if (store_lanes_p)
5545 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5546 else
5547 aggr_type = vectype;
5548
5549 /* In case the vectorization factor (VF) is bigger than the number
5550 of elements that we can fit in a vectype (nunits), we have to generate
5551 more than one vector stmt - i.e - we need to "unroll" the
5552 vector stmt by a factor VF/nunits. For more details see documentation in
5553 vect_get_vec_def_for_copy_stmt. */
5554
5555 /* In case of interleaving (non-unit grouped access):
5556
5557 S1: &base + 2 = x2
5558 S2: &base = x0
5559 S3: &base + 1 = x1
5560 S4: &base + 3 = x3
5561
5562 We create vectorized stores starting from base address (the access of the
5563 first stmt in the chain (S2 in the above example), when the last store stmt
5564 of the chain (S4) is reached:
5565
5566 VS1: &base = vx2
5567 VS2: &base + vec_size*1 = vx0
5568 VS3: &base + vec_size*2 = vx1
5569 VS4: &base + vec_size*3 = vx3
5570
5571 Then permutation statements are generated:
5572
5573 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5574 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5575 ...
5576
5577 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5578 (the order of the data-refs in the output of vect_permute_store_chain
5579 corresponds to the order of scalar stmts in the interleaving chain - see
5580 the documentation of vect_permute_store_chain()).
5581
5582 In case of both multiple types and interleaving, above vector stores and
5583 permutation stmts are created for every copy. The result vector stmts are
5584 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5585 STMT_VINFO_RELATED_STMT for the next copies.
5586 */
5587
5588 prev_stmt_info = NULL;
5589 for (j = 0; j < ncopies; j++)
5590 {
5591 gimple new_stmt;
5592
5593 if (j == 0)
5594 {
5595 if (slp)
5596 {
5597 /* Get vectorized arguments for SLP_NODE. */
5598 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5599 NULL, slp_node, -1);
5600
5601 vec_oprnd = vec_oprnds[0];
5602 }
5603 else
5604 {
5605 /* For interleaved stores we collect vectorized defs for all the
5606 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5607 used as an input to vect_permute_store_chain(), and OPRNDS as
5608 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5609
5610 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5611 OPRNDS are of size 1. */
5612 next_stmt = first_stmt;
5613 for (i = 0; i < group_size; i++)
5614 {
5615 /* Since gaps are not supported for interleaved stores,
5616 GROUP_SIZE is the exact number of stmts in the chain.
5617 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5618 there is no interleaving, GROUP_SIZE is 1, and only one
5619 iteration of the loop will be executed. */
5620 gcc_assert (next_stmt
5621 && gimple_assign_single_p (next_stmt));
5622 op = gimple_assign_rhs1 (next_stmt);
5623
5624 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5625 NULL);
5626 dr_chain.quick_push (vec_oprnd);
5627 oprnds.quick_push (vec_oprnd);
5628 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5629 }
5630 }
5631
5632 /* We should have catched mismatched types earlier. */
5633 gcc_assert (useless_type_conversion_p (vectype,
5634 TREE_TYPE (vec_oprnd)));
5635 bool simd_lane_access_p
5636 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5637 if (simd_lane_access_p
5638 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5639 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5640 && integer_zerop (DR_OFFSET (first_dr))
5641 && integer_zerop (DR_INIT (first_dr))
5642 && alias_sets_conflict_p (get_alias_set (aggr_type),
5643 get_alias_set (DR_REF (first_dr))))
5644 {
5645 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5646 dataref_offset = build_int_cst (reference_alias_ptr_type
5647 (DR_REF (first_dr)), 0);
5648 inv_p = false;
5649 }
5650 else
5651 dataref_ptr
5652 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5653 simd_lane_access_p ? loop : NULL,
5654 offset, &dummy, gsi, &ptr_incr,
5655 simd_lane_access_p, &inv_p);
5656 gcc_assert (bb_vinfo || !inv_p);
5657 }
5658 else
5659 {
5660 /* For interleaved stores we created vectorized defs for all the
5661 defs stored in OPRNDS in the previous iteration (previous copy).
5662 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5663 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5664 next copy.
5665 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5666 OPRNDS are of size 1. */
5667 for (i = 0; i < group_size; i++)
5668 {
5669 op = oprnds[i];
5670 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5671 &def, &dt);
5672 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5673 dr_chain[i] = vec_oprnd;
5674 oprnds[i] = vec_oprnd;
5675 }
5676 if (dataref_offset)
5677 dataref_offset
5678 = int_const_binop (PLUS_EXPR, dataref_offset,
5679 TYPE_SIZE_UNIT (aggr_type));
5680 else
5681 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5682 TYPE_SIZE_UNIT (aggr_type));
5683 }
5684
5685 if (store_lanes_p)
5686 {
5687 tree vec_array;
5688
5689 /* Combine all the vectors into an array. */
5690 vec_array = create_vector_array (vectype, vec_num);
5691 for (i = 0; i < vec_num; i++)
5692 {
5693 vec_oprnd = dr_chain[i];
5694 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5695 }
5696
5697 /* Emit:
5698 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5699 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5700 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5701 gimple_call_set_lhs (new_stmt, data_ref);
5702 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5703 }
5704 else
5705 {
5706 new_stmt = NULL;
5707 if (grouped_store)
5708 {
5709 if (j == 0)
5710 result_chain.create (group_size);
5711 /* Permute. */
5712 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5713 &result_chain);
5714 }
5715
5716 next_stmt = first_stmt;
5717 for (i = 0; i < vec_num; i++)
5718 {
5719 unsigned align, misalign;
5720
5721 if (i > 0)
5722 /* Bump the vector pointer. */
5723 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5724 stmt, NULL_TREE);
5725
5726 if (slp)
5727 vec_oprnd = vec_oprnds[i];
5728 else if (grouped_store)
5729 /* For grouped stores vectorized defs are interleaved in
5730 vect_permute_store_chain(). */
5731 vec_oprnd = result_chain[i];
5732
5733 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5734 dataref_ptr,
5735 dataref_offset
5736 ? dataref_offset
5737 : build_int_cst (reference_alias_ptr_type
5738 (DR_REF (first_dr)), 0));
5739 align = TYPE_ALIGN_UNIT (vectype);
5740 if (aligned_access_p (first_dr))
5741 misalign = 0;
5742 else if (DR_MISALIGNMENT (first_dr) == -1)
5743 {
5744 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5745 align = TYPE_ALIGN_UNIT (elem_type);
5746 else
5747 align = get_object_alignment (DR_REF (first_dr))
5748 / BITS_PER_UNIT;
5749 misalign = 0;
5750 TREE_TYPE (data_ref)
5751 = build_aligned_type (TREE_TYPE (data_ref),
5752 align * BITS_PER_UNIT);
5753 }
5754 else
5755 {
5756 TREE_TYPE (data_ref)
5757 = build_aligned_type (TREE_TYPE (data_ref),
5758 TYPE_ALIGN (elem_type));
5759 misalign = DR_MISALIGNMENT (first_dr);
5760 }
5761 if (dataref_offset == NULL_TREE
5762 && TREE_CODE (dataref_ptr) == SSA_NAME)
5763 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5764 misalign);
5765
5766 if (negative
5767 && dt != vect_constant_def
5768 && dt != vect_external_def)
5769 {
5770 tree perm_mask = perm_mask_for_reverse (vectype);
5771 tree perm_dest
5772 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5773 vectype);
5774 tree new_temp = make_ssa_name (perm_dest);
5775
5776 /* Generate the permute statement. */
5777 gimple perm_stmt
5778 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5779 vec_oprnd, perm_mask);
5780 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5781
5782 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5783 vec_oprnd = new_temp;
5784 }
5785
5786 /* Arguments are ready. Create the new vector stmt. */
5787 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5788 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5789
5790 if (slp)
5791 continue;
5792
5793 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5794 if (!next_stmt)
5795 break;
5796 }
5797 }
5798 if (!slp)
5799 {
5800 if (j == 0)
5801 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5802 else
5803 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5804 prev_stmt_info = vinfo_for_stmt (new_stmt);
5805 }
5806 }
5807
5808 dr_chain.release ();
5809 oprnds.release ();
5810 result_chain.release ();
5811 vec_oprnds.release ();
5812
5813 return true;
5814 }
5815
5816 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5817 VECTOR_CST mask. No checks are made that the target platform supports the
5818 mask, so callers may wish to test can_vec_perm_p separately, or use
5819 vect_gen_perm_mask_checked. */
5820
5821 tree
5822 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5823 {
5824 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5825 int i, nunits;
5826
5827 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5828
5829 mask_elt_type = lang_hooks.types.type_for_mode
5830 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5831 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5832
5833 mask_elts = XALLOCAVEC (tree, nunits);
5834 for (i = nunits - 1; i >= 0; i--)
5835 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5836 mask_vec = build_vector (mask_type, mask_elts);
5837
5838 return mask_vec;
5839 }
5840
5841 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5842 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5843
5844 tree
5845 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5846 {
5847 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5848 return vect_gen_perm_mask_any (vectype, sel);
5849 }
5850
5851 /* Given a vector variable X and Y, that was generated for the scalar
5852 STMT, generate instructions to permute the vector elements of X and Y
5853 using permutation mask MASK_VEC, insert them at *GSI and return the
5854 permuted vector variable. */
5855
5856 static tree
5857 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5858 gimple_stmt_iterator *gsi)
5859 {
5860 tree vectype = TREE_TYPE (x);
5861 tree perm_dest, data_ref;
5862 gimple perm_stmt;
5863
5864 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5865 data_ref = make_ssa_name (perm_dest);
5866
5867 /* Generate the permute statement. */
5868 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5869 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5870
5871 return data_ref;
5872 }
5873
5874 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5875 inserting them on the loops preheader edge. Returns true if we
5876 were successful in doing so (and thus STMT can be moved then),
5877 otherwise returns false. */
5878
5879 static bool
5880 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5881 {
5882 ssa_op_iter i;
5883 tree op;
5884 bool any = false;
5885
5886 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5887 {
5888 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5889 if (!gimple_nop_p (def_stmt)
5890 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5891 {
5892 /* Make sure we don't need to recurse. While we could do
5893 so in simple cases when there are more complex use webs
5894 we don't have an easy way to preserve stmt order to fulfil
5895 dependencies within them. */
5896 tree op2;
5897 ssa_op_iter i2;
5898 if (gimple_code (def_stmt) == GIMPLE_PHI)
5899 return false;
5900 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5901 {
5902 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5903 if (!gimple_nop_p (def_stmt2)
5904 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5905 return false;
5906 }
5907 any = true;
5908 }
5909 }
5910
5911 if (!any)
5912 return true;
5913
5914 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5915 {
5916 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5917 if (!gimple_nop_p (def_stmt)
5918 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5919 {
5920 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5921 gsi_remove (&gsi, false);
5922 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5923 }
5924 }
5925
5926 return true;
5927 }
5928
5929 /* vectorizable_load.
5930
5931 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5932 can be vectorized.
5933 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5934 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5935 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5936
5937 static bool
5938 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5939 slp_tree slp_node, slp_instance slp_node_instance)
5940 {
5941 tree scalar_dest;
5942 tree vec_dest = NULL;
5943 tree data_ref = NULL;
5944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5945 stmt_vec_info prev_stmt_info;
5946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5947 struct loop *loop = NULL;
5948 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5949 bool nested_in_vect_loop = false;
5950 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5951 tree elem_type;
5952 tree new_temp;
5953 machine_mode mode;
5954 gimple new_stmt = NULL;
5955 tree dummy;
5956 enum dr_alignment_support alignment_support_scheme;
5957 tree dataref_ptr = NULL_TREE;
5958 tree dataref_offset = NULL_TREE;
5959 gimple ptr_incr = NULL;
5960 int ncopies;
5961 int i, j, group_size = -1, group_gap_adj;
5962 tree msq = NULL_TREE, lsq;
5963 tree offset = NULL_TREE;
5964 tree byte_offset = NULL_TREE;
5965 tree realignment_token = NULL_TREE;
5966 gphi *phi = NULL;
5967 vec<tree> dr_chain = vNULL;
5968 bool grouped_load = false;
5969 bool load_lanes_p = false;
5970 gimple first_stmt;
5971 bool inv_p;
5972 bool negative = false;
5973 bool compute_in_loop = false;
5974 struct loop *at_loop;
5975 int vec_num;
5976 bool slp = (slp_node != NULL);
5977 bool slp_perm = false;
5978 enum tree_code code;
5979 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5980 int vf;
5981 tree aggr_type;
5982 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5983 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5984 int gather_scale = 1;
5985 enum vect_def_type gather_dt = vect_unknown_def_type;
5986
5987 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5988 return false;
5989
5990 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5991 return false;
5992
5993 /* Is vectorizable load? */
5994 if (!is_gimple_assign (stmt))
5995 return false;
5996
5997 scalar_dest = gimple_assign_lhs (stmt);
5998 if (TREE_CODE (scalar_dest) != SSA_NAME)
5999 return false;
6000
6001 code = gimple_assign_rhs_code (stmt);
6002 if (code != ARRAY_REF
6003 && code != BIT_FIELD_REF
6004 && code != INDIRECT_REF
6005 && code != COMPONENT_REF
6006 && code != IMAGPART_EXPR
6007 && code != REALPART_EXPR
6008 && code != MEM_REF
6009 && TREE_CODE_CLASS (code) != tcc_declaration)
6010 return false;
6011
6012 if (!STMT_VINFO_DATA_REF (stmt_info))
6013 return false;
6014
6015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6016 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6017
6018 if (loop_vinfo)
6019 {
6020 loop = LOOP_VINFO_LOOP (loop_vinfo);
6021 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6022 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6023 }
6024 else
6025 vf = 1;
6026
6027 /* Multiple types in SLP are handled by creating the appropriate number of
6028 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6029 case of SLP. */
6030 if (slp || PURE_SLP_STMT (stmt_info))
6031 ncopies = 1;
6032 else
6033 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6034
6035 gcc_assert (ncopies >= 1);
6036
6037 /* FORNOW. This restriction should be relaxed. */
6038 if (nested_in_vect_loop && ncopies > 1)
6039 {
6040 if (dump_enabled_p ())
6041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6042 "multiple types in nested loop.\n");
6043 return false;
6044 }
6045
6046 /* Invalidate assumptions made by dependence analysis when vectorization
6047 on the unrolled body effectively re-orders stmts. */
6048 if (ncopies > 1
6049 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6050 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6051 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6052 {
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6055 "cannot perform implicit CSE when unrolling "
6056 "with negative dependence distance\n");
6057 return false;
6058 }
6059
6060 elem_type = TREE_TYPE (vectype);
6061 mode = TYPE_MODE (vectype);
6062
6063 /* FORNOW. In some cases can vectorize even if data-type not supported
6064 (e.g. - data copies). */
6065 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6066 {
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 "Aligned load, but unsupported type.\n");
6070 return false;
6071 }
6072
6073 /* Check if the load is a part of an interleaving chain. */
6074 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6075 {
6076 grouped_load = true;
6077 /* FORNOW */
6078 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
6079
6080 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6081
6082 /* If this is single-element interleaving with an element distance
6083 that leaves unused vector loads around punt - we at least create
6084 very sub-optimal code in that case (and blow up memory,
6085 see PR65518). */
6086 if (first_stmt == stmt
6087 && !GROUP_NEXT_ELEMENT (stmt_info)
6088 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6089 {
6090 if (dump_enabled_p ())
6091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6092 "single-element interleaving not supported "
6093 "for not adjacent vector loads\n");
6094 return false;
6095 }
6096
6097 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6098 slp_perm = true;
6099
6100 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6101 if (!slp
6102 && !PURE_SLP_STMT (stmt_info)
6103 && !STMT_VINFO_STRIDED_P (stmt_info))
6104 {
6105 if (vect_load_lanes_supported (vectype, group_size))
6106 load_lanes_p = true;
6107 else if (!vect_grouped_load_supported (vectype, group_size))
6108 return false;
6109 }
6110
6111 /* Invalidate assumptions made by dependence analysis when vectorization
6112 on the unrolled body effectively re-orders stmts. */
6113 if (!PURE_SLP_STMT (stmt_info)
6114 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6115 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6116 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6117 {
6118 if (dump_enabled_p ())
6119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6120 "cannot perform implicit CSE when performing "
6121 "group loads with negative dependence distance\n");
6122 return false;
6123 }
6124
6125 /* Similarly when the stmt is a load that is both part of a SLP
6126 instance and a loop vectorized stmt via the same-dr mechanism
6127 we have to give up. */
6128 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6129 && (STMT_SLP_TYPE (stmt_info)
6130 != STMT_SLP_TYPE (vinfo_for_stmt
6131 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6132 {
6133 if (dump_enabled_p ())
6134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6135 "conflicting SLP types for CSEd load\n");
6136 return false;
6137 }
6138 }
6139
6140
6141 if (STMT_VINFO_GATHER_P (stmt_info))
6142 {
6143 gimple def_stmt;
6144 tree def;
6145 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
6146 &gather_off, &gather_scale);
6147 gcc_assert (gather_decl);
6148 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
6149 &def_stmt, &def, &gather_dt,
6150 &gather_off_vectype))
6151 {
6152 if (dump_enabled_p ())
6153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6154 "gather index use not simple.\n");
6155 return false;
6156 }
6157 }
6158 else if (STMT_VINFO_STRIDED_P (stmt_info))
6159 {
6160 if ((grouped_load
6161 && (slp || PURE_SLP_STMT (stmt_info)))
6162 && (group_size > nunits
6163 || nunits % group_size != 0))
6164 {
6165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6166 "unhandled strided group load\n");
6167 return false;
6168 }
6169 }
6170 else
6171 {
6172 negative = tree_int_cst_compare (nested_in_vect_loop
6173 ? STMT_VINFO_DR_STEP (stmt_info)
6174 : DR_STEP (dr),
6175 size_zero_node) < 0;
6176 if (negative && ncopies > 1)
6177 {
6178 if (dump_enabled_p ())
6179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6180 "multiple types with negative step.\n");
6181 return false;
6182 }
6183
6184 if (negative)
6185 {
6186 if (grouped_load)
6187 {
6188 if (dump_enabled_p ())
6189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6190 "negative step for group load not supported"
6191 "\n");
6192 return false;
6193 }
6194 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6195 if (alignment_support_scheme != dr_aligned
6196 && alignment_support_scheme != dr_unaligned_supported)
6197 {
6198 if (dump_enabled_p ())
6199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6200 "negative step but alignment required.\n");
6201 return false;
6202 }
6203 if (!perm_mask_for_reverse (vectype))
6204 {
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "negative step and reversing not supported."
6208 "\n");
6209 return false;
6210 }
6211 }
6212 }
6213
6214 if (!vec_stmt) /* transformation not required. */
6215 {
6216 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6217 /* The SLP costs are calculated during SLP analysis. */
6218 if (!PURE_SLP_STMT (stmt_info))
6219 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6220 NULL, NULL, NULL);
6221 return true;
6222 }
6223
6224 if (dump_enabled_p ())
6225 dump_printf_loc (MSG_NOTE, vect_location,
6226 "transform load. ncopies = %d\n", ncopies);
6227
6228 /** Transform. **/
6229
6230 ensure_base_align (stmt_info, dr);
6231
6232 if (STMT_VINFO_GATHER_P (stmt_info))
6233 {
6234 tree vec_oprnd0 = NULL_TREE, op;
6235 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6236 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6237 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6238 edge pe = loop_preheader_edge (loop);
6239 gimple_seq seq;
6240 basic_block new_bb;
6241 enum { NARROW, NONE, WIDEN } modifier;
6242 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6243
6244 if (nunits == gather_off_nunits)
6245 modifier = NONE;
6246 else if (nunits == gather_off_nunits / 2)
6247 {
6248 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6249 modifier = WIDEN;
6250
6251 for (i = 0; i < gather_off_nunits; ++i)
6252 sel[i] = i | nunits;
6253
6254 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6255 }
6256 else if (nunits == gather_off_nunits * 2)
6257 {
6258 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6259 modifier = NARROW;
6260
6261 for (i = 0; i < nunits; ++i)
6262 sel[i] = i < gather_off_nunits
6263 ? i : i + nunits - gather_off_nunits;
6264
6265 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6266 ncopies *= 2;
6267 }
6268 else
6269 gcc_unreachable ();
6270
6271 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6272 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6273 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6274 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6275 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6276 scaletype = TREE_VALUE (arglist);
6277 gcc_checking_assert (types_compatible_p (srctype, rettype));
6278
6279 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6280
6281 ptr = fold_convert (ptrtype, gather_base);
6282 if (!is_gimple_min_invariant (ptr))
6283 {
6284 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6285 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6286 gcc_assert (!new_bb);
6287 }
6288
6289 /* Currently we support only unconditional gather loads,
6290 so mask should be all ones. */
6291 if (TREE_CODE (masktype) == INTEGER_TYPE)
6292 mask = build_int_cst (masktype, -1);
6293 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6294 {
6295 mask = build_int_cst (TREE_TYPE (masktype), -1);
6296 mask = build_vector_from_val (masktype, mask);
6297 mask = vect_init_vector (stmt, mask, masktype, NULL);
6298 }
6299 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6300 {
6301 REAL_VALUE_TYPE r;
6302 long tmp[6];
6303 for (j = 0; j < 6; ++j)
6304 tmp[j] = -1;
6305 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6306 mask = build_real (TREE_TYPE (masktype), r);
6307 mask = build_vector_from_val (masktype, mask);
6308 mask = vect_init_vector (stmt, mask, masktype, NULL);
6309 }
6310 else
6311 gcc_unreachable ();
6312
6313 scale = build_int_cst (scaletype, gather_scale);
6314
6315 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6316 merge = build_int_cst (TREE_TYPE (rettype), 0);
6317 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6318 {
6319 REAL_VALUE_TYPE r;
6320 long tmp[6];
6321 for (j = 0; j < 6; ++j)
6322 tmp[j] = 0;
6323 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6324 merge = build_real (TREE_TYPE (rettype), r);
6325 }
6326 else
6327 gcc_unreachable ();
6328 merge = build_vector_from_val (rettype, merge);
6329 merge = vect_init_vector (stmt, merge, rettype, NULL);
6330
6331 prev_stmt_info = NULL;
6332 for (j = 0; j < ncopies; ++j)
6333 {
6334 if (modifier == WIDEN && (j & 1))
6335 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6336 perm_mask, stmt, gsi);
6337 else if (j == 0)
6338 op = vec_oprnd0
6339 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6340 else
6341 op = vec_oprnd0
6342 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6343
6344 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6345 {
6346 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6347 == TYPE_VECTOR_SUBPARTS (idxtype));
6348 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6349 var = make_ssa_name (var);
6350 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6351 new_stmt
6352 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6354 op = var;
6355 }
6356
6357 new_stmt
6358 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6359
6360 if (!useless_type_conversion_p (vectype, rettype))
6361 {
6362 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6363 == TYPE_VECTOR_SUBPARTS (rettype));
6364 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6365 op = make_ssa_name (var, new_stmt);
6366 gimple_call_set_lhs (new_stmt, op);
6367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6368 var = make_ssa_name (vec_dest);
6369 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6370 new_stmt
6371 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6372 }
6373 else
6374 {
6375 var = make_ssa_name (vec_dest, new_stmt);
6376 gimple_call_set_lhs (new_stmt, var);
6377 }
6378
6379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6380
6381 if (modifier == NARROW)
6382 {
6383 if ((j & 1) == 0)
6384 {
6385 prev_res = var;
6386 continue;
6387 }
6388 var = permute_vec_elements (prev_res, var,
6389 perm_mask, stmt, gsi);
6390 new_stmt = SSA_NAME_DEF_STMT (var);
6391 }
6392
6393 if (prev_stmt_info == NULL)
6394 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6395 else
6396 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6397 prev_stmt_info = vinfo_for_stmt (new_stmt);
6398 }
6399 return true;
6400 }
6401 else if (STMT_VINFO_STRIDED_P (stmt_info))
6402 {
6403 gimple_stmt_iterator incr_gsi;
6404 bool insert_after;
6405 gimple incr;
6406 tree offvar;
6407 tree ivstep;
6408 tree running_off;
6409 vec<constructor_elt, va_gc> *v = NULL;
6410 gimple_seq stmts = NULL;
6411 tree stride_base, stride_step, alias_off;
6412
6413 gcc_assert (!nested_in_vect_loop);
6414
6415 if (slp && grouped_load)
6416 first_dr = STMT_VINFO_DATA_REF
6417 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6418 else
6419 first_dr = dr;
6420
6421 stride_base
6422 = fold_build_pointer_plus
6423 (DR_BASE_ADDRESS (first_dr),
6424 size_binop (PLUS_EXPR,
6425 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6426 convert_to_ptrofftype (DR_INIT (first_dr))));
6427 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6428
6429 /* For a load with loop-invariant (but other than power-of-2)
6430 stride (i.e. not a grouped access) like so:
6431
6432 for (i = 0; i < n; i += stride)
6433 ... = array[i];
6434
6435 we generate a new induction variable and new accesses to
6436 form a new vector (or vectors, depending on ncopies):
6437
6438 for (j = 0; ; j += VF*stride)
6439 tmp1 = array[j];
6440 tmp2 = array[j + stride];
6441 ...
6442 vectemp = {tmp1, tmp2, ...}
6443 */
6444
6445 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6446 build_int_cst (TREE_TYPE (stride_step), vf));
6447
6448 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6449
6450 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6451 loop, &incr_gsi, insert_after,
6452 &offvar, NULL);
6453 incr = gsi_stmt (incr_gsi);
6454 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6455
6456 stride_step = force_gimple_operand (unshare_expr (stride_step),
6457 &stmts, true, NULL_TREE);
6458 if (stmts)
6459 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6460
6461 prev_stmt_info = NULL;
6462 running_off = offvar;
6463 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6464 int nloads = nunits;
6465 tree ltype = TREE_TYPE (vectype);
6466 auto_vec<tree> dr_chain;
6467 if (slp)
6468 {
6469 nloads = nunits / group_size;
6470 if (group_size < nunits)
6471 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6472 else
6473 ltype = vectype;
6474 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6475 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6476 if (slp_perm)
6477 dr_chain.create (ncopies);
6478 }
6479 for (j = 0; j < ncopies; j++)
6480 {
6481 tree vec_inv;
6482
6483 if (nloads > 1)
6484 {
6485 vec_alloc (v, nloads);
6486 for (i = 0; i < nloads; i++)
6487 {
6488 tree newref, newoff;
6489 gimple incr;
6490 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6491
6492 newref = force_gimple_operand_gsi (gsi, newref, true,
6493 NULL_TREE, true,
6494 GSI_SAME_STMT);
6495 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6496 newoff = copy_ssa_name (running_off);
6497 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6498 running_off, stride_step);
6499 vect_finish_stmt_generation (stmt, incr, gsi);
6500
6501 running_off = newoff;
6502 }
6503
6504 vec_inv = build_constructor (vectype, v);
6505 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6506 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6507 }
6508 else
6509 {
6510 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6511 build2 (MEM_REF, ltype,
6512 running_off, alias_off));
6513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6514
6515 tree newoff = copy_ssa_name (running_off);
6516 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6517 running_off, stride_step);
6518 vect_finish_stmt_generation (stmt, incr, gsi);
6519
6520 running_off = newoff;
6521 }
6522
6523 if (slp)
6524 {
6525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6526 if (slp_perm)
6527 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6528 }
6529 else
6530 {
6531 if (j == 0)
6532 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6533 else
6534 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6535 prev_stmt_info = vinfo_for_stmt (new_stmt);
6536 }
6537 }
6538 if (slp_perm)
6539 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6540 slp_node_instance, false);
6541 return true;
6542 }
6543
6544 if (grouped_load)
6545 {
6546 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6547 if (slp
6548 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6549 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6550 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6551
6552 /* Check if the chain of loads is already vectorized. */
6553 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6554 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6555 ??? But we can only do so if there is exactly one
6556 as we have no way to get at the rest. Leave the CSE
6557 opportunity alone.
6558 ??? With the group load eventually participating
6559 in multiple different permutations (having multiple
6560 slp nodes which refer to the same group) the CSE
6561 is even wrong code. See PR56270. */
6562 && !slp)
6563 {
6564 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6565 return true;
6566 }
6567 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6568 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6569 group_gap_adj = 0;
6570
6571 /* VEC_NUM is the number of vect stmts to be created for this group. */
6572 if (slp)
6573 {
6574 grouped_load = false;
6575 /* For SLP permutation support we need to load the whole group,
6576 not only the number of vector stmts the permutation result
6577 fits in. */
6578 if (slp_perm)
6579 vec_num = (group_size * vf + nunits - 1) / nunits;
6580 else
6581 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6582 group_gap_adj = vf * group_size - nunits * vec_num;
6583 }
6584 else
6585 vec_num = group_size;
6586 }
6587 else
6588 {
6589 first_stmt = stmt;
6590 first_dr = dr;
6591 group_size = vec_num = 1;
6592 group_gap_adj = 0;
6593 }
6594
6595 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6596 gcc_assert (alignment_support_scheme);
6597 /* Targets with load-lane instructions must not require explicit
6598 realignment. */
6599 gcc_assert (!load_lanes_p
6600 || alignment_support_scheme == dr_aligned
6601 || alignment_support_scheme == dr_unaligned_supported);
6602
6603 /* In case the vectorization factor (VF) is bigger than the number
6604 of elements that we can fit in a vectype (nunits), we have to generate
6605 more than one vector stmt - i.e - we need to "unroll" the
6606 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6607 from one copy of the vector stmt to the next, in the field
6608 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6609 stages to find the correct vector defs to be used when vectorizing
6610 stmts that use the defs of the current stmt. The example below
6611 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6612 need to create 4 vectorized stmts):
6613
6614 before vectorization:
6615 RELATED_STMT VEC_STMT
6616 S1: x = memref - -
6617 S2: z = x + 1 - -
6618
6619 step 1: vectorize stmt S1:
6620 We first create the vector stmt VS1_0, and, as usual, record a
6621 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6622 Next, we create the vector stmt VS1_1, and record a pointer to
6623 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6624 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6625 stmts and pointers:
6626 RELATED_STMT VEC_STMT
6627 VS1_0: vx0 = memref0 VS1_1 -
6628 VS1_1: vx1 = memref1 VS1_2 -
6629 VS1_2: vx2 = memref2 VS1_3 -
6630 VS1_3: vx3 = memref3 - -
6631 S1: x = load - VS1_0
6632 S2: z = x + 1 - -
6633
6634 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6635 information we recorded in RELATED_STMT field is used to vectorize
6636 stmt S2. */
6637
6638 /* In case of interleaving (non-unit grouped access):
6639
6640 S1: x2 = &base + 2
6641 S2: x0 = &base
6642 S3: x1 = &base + 1
6643 S4: x3 = &base + 3
6644
6645 Vectorized loads are created in the order of memory accesses
6646 starting from the access of the first stmt of the chain:
6647
6648 VS1: vx0 = &base
6649 VS2: vx1 = &base + vec_size*1
6650 VS3: vx3 = &base + vec_size*2
6651 VS4: vx4 = &base + vec_size*3
6652
6653 Then permutation statements are generated:
6654
6655 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6656 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6657 ...
6658
6659 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6660 (the order of the data-refs in the output of vect_permute_load_chain
6661 corresponds to the order of scalar stmts in the interleaving chain - see
6662 the documentation of vect_permute_load_chain()).
6663 The generation of permutation stmts and recording them in
6664 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6665
6666 In case of both multiple types and interleaving, the vector loads and
6667 permutation stmts above are created for every copy. The result vector
6668 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6669 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6670
6671 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6672 on a target that supports unaligned accesses (dr_unaligned_supported)
6673 we generate the following code:
6674 p = initial_addr;
6675 indx = 0;
6676 loop {
6677 p = p + indx * vectype_size;
6678 vec_dest = *(p);
6679 indx = indx + 1;
6680 }
6681
6682 Otherwise, the data reference is potentially unaligned on a target that
6683 does not support unaligned accesses (dr_explicit_realign_optimized) -
6684 then generate the following code, in which the data in each iteration is
6685 obtained by two vector loads, one from the previous iteration, and one
6686 from the current iteration:
6687 p1 = initial_addr;
6688 msq_init = *(floor(p1))
6689 p2 = initial_addr + VS - 1;
6690 realignment_token = call target_builtin;
6691 indx = 0;
6692 loop {
6693 p2 = p2 + indx * vectype_size
6694 lsq = *(floor(p2))
6695 vec_dest = realign_load (msq, lsq, realignment_token)
6696 indx = indx + 1;
6697 msq = lsq;
6698 } */
6699
6700 /* If the misalignment remains the same throughout the execution of the
6701 loop, we can create the init_addr and permutation mask at the loop
6702 preheader. Otherwise, it needs to be created inside the loop.
6703 This can only occur when vectorizing memory accesses in the inner-loop
6704 nested within an outer-loop that is being vectorized. */
6705
6706 if (nested_in_vect_loop
6707 && (TREE_INT_CST_LOW (DR_STEP (dr))
6708 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6709 {
6710 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6711 compute_in_loop = true;
6712 }
6713
6714 if ((alignment_support_scheme == dr_explicit_realign_optimized
6715 || alignment_support_scheme == dr_explicit_realign)
6716 && !compute_in_loop)
6717 {
6718 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6719 alignment_support_scheme, NULL_TREE,
6720 &at_loop);
6721 if (alignment_support_scheme == dr_explicit_realign_optimized)
6722 {
6723 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6724 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6725 size_one_node);
6726 }
6727 }
6728 else
6729 at_loop = loop;
6730
6731 if (negative)
6732 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6733
6734 if (load_lanes_p)
6735 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6736 else
6737 aggr_type = vectype;
6738
6739 prev_stmt_info = NULL;
6740 for (j = 0; j < ncopies; j++)
6741 {
6742 /* 1. Create the vector or array pointer update chain. */
6743 if (j == 0)
6744 {
6745 bool simd_lane_access_p
6746 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6747 if (simd_lane_access_p
6748 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6749 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6750 && integer_zerop (DR_OFFSET (first_dr))
6751 && integer_zerop (DR_INIT (first_dr))
6752 && alias_sets_conflict_p (get_alias_set (aggr_type),
6753 get_alias_set (DR_REF (first_dr)))
6754 && (alignment_support_scheme == dr_aligned
6755 || alignment_support_scheme == dr_unaligned_supported))
6756 {
6757 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6758 dataref_offset = build_int_cst (reference_alias_ptr_type
6759 (DR_REF (first_dr)), 0);
6760 inv_p = false;
6761 }
6762 else
6763 dataref_ptr
6764 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6765 offset, &dummy, gsi, &ptr_incr,
6766 simd_lane_access_p, &inv_p,
6767 byte_offset);
6768 }
6769 else if (dataref_offset)
6770 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6771 TYPE_SIZE_UNIT (aggr_type));
6772 else
6773 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6774 TYPE_SIZE_UNIT (aggr_type));
6775
6776 if (grouped_load || slp_perm)
6777 dr_chain.create (vec_num);
6778
6779 if (load_lanes_p)
6780 {
6781 tree vec_array;
6782
6783 vec_array = create_vector_array (vectype, vec_num);
6784
6785 /* Emit:
6786 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6787 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6788 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6789 gimple_call_set_lhs (new_stmt, vec_array);
6790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6791
6792 /* Extract each vector into an SSA_NAME. */
6793 for (i = 0; i < vec_num; i++)
6794 {
6795 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6796 vec_array, i);
6797 dr_chain.quick_push (new_temp);
6798 }
6799
6800 /* Record the mapping between SSA_NAMEs and statements. */
6801 vect_record_grouped_load_vectors (stmt, dr_chain);
6802 }
6803 else
6804 {
6805 for (i = 0; i < vec_num; i++)
6806 {
6807 if (i > 0)
6808 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6809 stmt, NULL_TREE);
6810
6811 /* 2. Create the vector-load in the loop. */
6812 switch (alignment_support_scheme)
6813 {
6814 case dr_aligned:
6815 case dr_unaligned_supported:
6816 {
6817 unsigned int align, misalign;
6818
6819 data_ref
6820 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6821 dataref_offset
6822 ? dataref_offset
6823 : build_int_cst (reference_alias_ptr_type
6824 (DR_REF (first_dr)), 0));
6825 align = TYPE_ALIGN_UNIT (vectype);
6826 if (alignment_support_scheme == dr_aligned)
6827 {
6828 gcc_assert (aligned_access_p (first_dr));
6829 misalign = 0;
6830 }
6831 else if (DR_MISALIGNMENT (first_dr) == -1)
6832 {
6833 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6834 align = TYPE_ALIGN_UNIT (elem_type);
6835 else
6836 align = (get_object_alignment (DR_REF (first_dr))
6837 / BITS_PER_UNIT);
6838 misalign = 0;
6839 TREE_TYPE (data_ref)
6840 = build_aligned_type (TREE_TYPE (data_ref),
6841 align * BITS_PER_UNIT);
6842 }
6843 else
6844 {
6845 TREE_TYPE (data_ref)
6846 = build_aligned_type (TREE_TYPE (data_ref),
6847 TYPE_ALIGN (elem_type));
6848 misalign = DR_MISALIGNMENT (first_dr);
6849 }
6850 if (dataref_offset == NULL_TREE
6851 && TREE_CODE (dataref_ptr) == SSA_NAME)
6852 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6853 align, misalign);
6854 break;
6855 }
6856 case dr_explicit_realign:
6857 {
6858 tree ptr, bump;
6859
6860 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6861
6862 if (compute_in_loop)
6863 msq = vect_setup_realignment (first_stmt, gsi,
6864 &realignment_token,
6865 dr_explicit_realign,
6866 dataref_ptr, NULL);
6867
6868 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6869 ptr = copy_ssa_name (dataref_ptr);
6870 else
6871 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6872 new_stmt = gimple_build_assign
6873 (ptr, BIT_AND_EXPR, dataref_ptr,
6874 build_int_cst
6875 (TREE_TYPE (dataref_ptr),
6876 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6878 data_ref
6879 = build2 (MEM_REF, vectype, ptr,
6880 build_int_cst (reference_alias_ptr_type
6881 (DR_REF (first_dr)), 0));
6882 vec_dest = vect_create_destination_var (scalar_dest,
6883 vectype);
6884 new_stmt = gimple_build_assign (vec_dest, data_ref);
6885 new_temp = make_ssa_name (vec_dest, new_stmt);
6886 gimple_assign_set_lhs (new_stmt, new_temp);
6887 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6888 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6889 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6890 msq = new_temp;
6891
6892 bump = size_binop (MULT_EXPR, vs,
6893 TYPE_SIZE_UNIT (elem_type));
6894 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6895 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6896 new_stmt = gimple_build_assign
6897 (NULL_TREE, BIT_AND_EXPR, ptr,
6898 build_int_cst
6899 (TREE_TYPE (ptr),
6900 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6901 ptr = copy_ssa_name (ptr, new_stmt);
6902 gimple_assign_set_lhs (new_stmt, ptr);
6903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6904 data_ref
6905 = build2 (MEM_REF, vectype, ptr,
6906 build_int_cst (reference_alias_ptr_type
6907 (DR_REF (first_dr)), 0));
6908 break;
6909 }
6910 case dr_explicit_realign_optimized:
6911 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6912 new_temp = copy_ssa_name (dataref_ptr);
6913 else
6914 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
6915 new_stmt = gimple_build_assign
6916 (new_temp, BIT_AND_EXPR, dataref_ptr,
6917 build_int_cst
6918 (TREE_TYPE (dataref_ptr),
6919 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6920 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6921 data_ref
6922 = build2 (MEM_REF, vectype, new_temp,
6923 build_int_cst (reference_alias_ptr_type
6924 (DR_REF (first_dr)), 0));
6925 break;
6926 default:
6927 gcc_unreachable ();
6928 }
6929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6930 new_stmt = gimple_build_assign (vec_dest, data_ref);
6931 new_temp = make_ssa_name (vec_dest, new_stmt);
6932 gimple_assign_set_lhs (new_stmt, new_temp);
6933 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6934
6935 /* 3. Handle explicit realignment if necessary/supported.
6936 Create in loop:
6937 vec_dest = realign_load (msq, lsq, realignment_token) */
6938 if (alignment_support_scheme == dr_explicit_realign_optimized
6939 || alignment_support_scheme == dr_explicit_realign)
6940 {
6941 lsq = gimple_assign_lhs (new_stmt);
6942 if (!realignment_token)
6943 realignment_token = dataref_ptr;
6944 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6945 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6946 msq, lsq, realignment_token);
6947 new_temp = make_ssa_name (vec_dest, new_stmt);
6948 gimple_assign_set_lhs (new_stmt, new_temp);
6949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6950
6951 if (alignment_support_scheme == dr_explicit_realign_optimized)
6952 {
6953 gcc_assert (phi);
6954 if (i == vec_num - 1 && j == ncopies - 1)
6955 add_phi_arg (phi, lsq,
6956 loop_latch_edge (containing_loop),
6957 UNKNOWN_LOCATION);
6958 msq = lsq;
6959 }
6960 }
6961
6962 /* 4. Handle invariant-load. */
6963 if (inv_p && !bb_vinfo)
6964 {
6965 gcc_assert (!grouped_load);
6966 /* If we have versioned for aliasing or the loop doesn't
6967 have any data dependencies that would preclude this,
6968 then we are sure this is a loop invariant load and
6969 thus we can insert it on the preheader edge. */
6970 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6971 && !nested_in_vect_loop
6972 && hoist_defs_of_uses (stmt, loop))
6973 {
6974 if (dump_enabled_p ())
6975 {
6976 dump_printf_loc (MSG_NOTE, vect_location,
6977 "hoisting out of the vectorized "
6978 "loop: ");
6979 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6980 }
6981 tree tem = copy_ssa_name (scalar_dest);
6982 gsi_insert_on_edge_immediate
6983 (loop_preheader_edge (loop),
6984 gimple_build_assign (tem,
6985 unshare_expr
6986 (gimple_assign_rhs1 (stmt))));
6987 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6988 }
6989 else
6990 {
6991 gimple_stmt_iterator gsi2 = *gsi;
6992 gsi_next (&gsi2);
6993 new_temp = vect_init_vector (stmt, scalar_dest,
6994 vectype, &gsi2);
6995 }
6996 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6997 set_vinfo_for_stmt (new_stmt,
6998 new_stmt_vec_info (new_stmt, loop_vinfo,
6999 bb_vinfo));
7000 }
7001
7002 if (negative)
7003 {
7004 tree perm_mask = perm_mask_for_reverse (vectype);
7005 new_temp = permute_vec_elements (new_temp, new_temp,
7006 perm_mask, stmt, gsi);
7007 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7008 }
7009
7010 /* Collect vector loads and later create their permutation in
7011 vect_transform_grouped_load (). */
7012 if (grouped_load || slp_perm)
7013 dr_chain.quick_push (new_temp);
7014
7015 /* Store vector loads in the corresponding SLP_NODE. */
7016 if (slp && !slp_perm)
7017 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7018 }
7019 /* Bump the vector pointer to account for a gap or for excess
7020 elements loaded for a permuted SLP load. */
7021 if (group_gap_adj != 0)
7022 {
7023 bool ovf;
7024 tree bump
7025 = wide_int_to_tree (sizetype,
7026 wi::smul (TYPE_SIZE_UNIT (elem_type),
7027 group_gap_adj, &ovf));
7028 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7029 stmt, bump);
7030 }
7031 }
7032
7033 if (slp && !slp_perm)
7034 continue;
7035
7036 if (slp_perm)
7037 {
7038 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7039 slp_node_instance, false))
7040 {
7041 dr_chain.release ();
7042 return false;
7043 }
7044 }
7045 else
7046 {
7047 if (grouped_load)
7048 {
7049 if (!load_lanes_p)
7050 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7051 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7052 }
7053 else
7054 {
7055 if (j == 0)
7056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7057 else
7058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7059 prev_stmt_info = vinfo_for_stmt (new_stmt);
7060 }
7061 }
7062 dr_chain.release ();
7063 }
7064
7065 return true;
7066 }
7067
7068 /* Function vect_is_simple_cond.
7069
7070 Input:
7071 LOOP - the loop that is being vectorized.
7072 COND - Condition that is checked for simple use.
7073
7074 Output:
7075 *COMP_VECTYPE - the vector type for the comparison.
7076
7077 Returns whether a COND can be vectorized. Checks whether
7078 condition operands are supportable using vec_is_simple_use. */
7079
7080 static bool
7081 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
7082 bb_vec_info bb_vinfo, tree *comp_vectype)
7083 {
7084 tree lhs, rhs;
7085 tree def;
7086 enum vect_def_type dt;
7087 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7088
7089 if (!COMPARISON_CLASS_P (cond))
7090 return false;
7091
7092 lhs = TREE_OPERAND (cond, 0);
7093 rhs = TREE_OPERAND (cond, 1);
7094
7095 if (TREE_CODE (lhs) == SSA_NAME)
7096 {
7097 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7098 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
7099 &lhs_def_stmt, &def, &dt, &vectype1))
7100 return false;
7101 }
7102 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7103 && TREE_CODE (lhs) != FIXED_CST)
7104 return false;
7105
7106 if (TREE_CODE (rhs) == SSA_NAME)
7107 {
7108 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7109 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
7110 &rhs_def_stmt, &def, &dt, &vectype2))
7111 return false;
7112 }
7113 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7114 && TREE_CODE (rhs) != FIXED_CST)
7115 return false;
7116
7117 *comp_vectype = vectype1 ? vectype1 : vectype2;
7118 return true;
7119 }
7120
7121 /* vectorizable_condition.
7122
7123 Check if STMT is conditional modify expression that can be vectorized.
7124 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7125 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7126 at GSI.
7127
7128 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7129 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7130 else clause if it is 2).
7131
7132 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7133
7134 bool
7135 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
7136 gimple *vec_stmt, tree reduc_def, int reduc_index,
7137 slp_tree slp_node)
7138 {
7139 tree scalar_dest = NULL_TREE;
7140 tree vec_dest = NULL_TREE;
7141 tree cond_expr, then_clause, else_clause;
7142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7143 tree comp_vectype = NULL_TREE;
7144 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7145 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7146 tree vec_compare, vec_cond_expr;
7147 tree new_temp;
7148 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7149 tree def;
7150 enum vect_def_type dt, dts[4];
7151 int ncopies;
7152 enum tree_code code;
7153 stmt_vec_info prev_stmt_info = NULL;
7154 int i, j;
7155 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7156 vec<tree> vec_oprnds0 = vNULL;
7157 vec<tree> vec_oprnds1 = vNULL;
7158 vec<tree> vec_oprnds2 = vNULL;
7159 vec<tree> vec_oprnds3 = vNULL;
7160 tree vec_cmp_type;
7161
7162 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7163 return false;
7164
7165 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7166 return false;
7167
7168 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7169 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7170 && reduc_def))
7171 return false;
7172
7173 /* FORNOW: not yet supported. */
7174 if (STMT_VINFO_LIVE_P (stmt_info))
7175 {
7176 if (dump_enabled_p ())
7177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7178 "value used after loop.\n");
7179 return false;
7180 }
7181
7182 /* Is vectorizable conditional operation? */
7183 if (!is_gimple_assign (stmt))
7184 return false;
7185
7186 code = gimple_assign_rhs_code (stmt);
7187
7188 if (code != COND_EXPR)
7189 return false;
7190
7191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7192 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7193
7194 if (slp_node || PURE_SLP_STMT (stmt_info))
7195 ncopies = 1;
7196 else
7197 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7198
7199 gcc_assert (ncopies >= 1);
7200 if (reduc_index && ncopies > 1)
7201 return false; /* FORNOW */
7202
7203 cond_expr = gimple_assign_rhs1 (stmt);
7204 then_clause = gimple_assign_rhs2 (stmt);
7205 else_clause = gimple_assign_rhs3 (stmt);
7206
7207 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7208 &comp_vectype)
7209 || !comp_vectype)
7210 return false;
7211
7212 if (TREE_CODE (then_clause) == SSA_NAME)
7213 {
7214 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7215 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7216 &then_def_stmt, &def, &dt))
7217 return false;
7218 }
7219 else if (TREE_CODE (then_clause) != INTEGER_CST
7220 && TREE_CODE (then_clause) != REAL_CST
7221 && TREE_CODE (then_clause) != FIXED_CST)
7222 return false;
7223
7224 if (TREE_CODE (else_clause) == SSA_NAME)
7225 {
7226 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7227 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7228 &else_def_stmt, &def, &dt))
7229 return false;
7230 }
7231 else if (TREE_CODE (else_clause) != INTEGER_CST
7232 && TREE_CODE (else_clause) != REAL_CST
7233 && TREE_CODE (else_clause) != FIXED_CST)
7234 return false;
7235
7236 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7237 /* The result of a vector comparison should be signed type. */
7238 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7239 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7240 if (vec_cmp_type == NULL_TREE)
7241 return false;
7242
7243 if (!vec_stmt)
7244 {
7245 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7246 return expand_vec_cond_expr_p (vectype, comp_vectype);
7247 }
7248
7249 /* Transform. */
7250
7251 if (!slp_node)
7252 {
7253 vec_oprnds0.create (1);
7254 vec_oprnds1.create (1);
7255 vec_oprnds2.create (1);
7256 vec_oprnds3.create (1);
7257 }
7258
7259 /* Handle def. */
7260 scalar_dest = gimple_assign_lhs (stmt);
7261 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7262
7263 /* Handle cond expr. */
7264 for (j = 0; j < ncopies; j++)
7265 {
7266 gassign *new_stmt = NULL;
7267 if (j == 0)
7268 {
7269 if (slp_node)
7270 {
7271 auto_vec<tree, 4> ops;
7272 auto_vec<vec<tree>, 4> vec_defs;
7273
7274 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7275 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7276 ops.safe_push (then_clause);
7277 ops.safe_push (else_clause);
7278 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7279 vec_oprnds3 = vec_defs.pop ();
7280 vec_oprnds2 = vec_defs.pop ();
7281 vec_oprnds1 = vec_defs.pop ();
7282 vec_oprnds0 = vec_defs.pop ();
7283
7284 ops.release ();
7285 vec_defs.release ();
7286 }
7287 else
7288 {
7289 gimple gtemp;
7290 vec_cond_lhs =
7291 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7292 stmt, NULL);
7293 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7294 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7295
7296 vec_cond_rhs =
7297 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7298 stmt, NULL);
7299 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7300 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7301 if (reduc_index == 1)
7302 vec_then_clause = reduc_def;
7303 else
7304 {
7305 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7306 stmt, NULL);
7307 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7308 NULL, &gtemp, &def, &dts[2]);
7309 }
7310 if (reduc_index == 2)
7311 vec_else_clause = reduc_def;
7312 else
7313 {
7314 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7315 stmt, NULL);
7316 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7317 NULL, &gtemp, &def, &dts[3]);
7318 }
7319 }
7320 }
7321 else
7322 {
7323 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7324 vec_oprnds0.pop ());
7325 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7326 vec_oprnds1.pop ());
7327 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7328 vec_oprnds2.pop ());
7329 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7330 vec_oprnds3.pop ());
7331 }
7332
7333 if (!slp_node)
7334 {
7335 vec_oprnds0.quick_push (vec_cond_lhs);
7336 vec_oprnds1.quick_push (vec_cond_rhs);
7337 vec_oprnds2.quick_push (vec_then_clause);
7338 vec_oprnds3.quick_push (vec_else_clause);
7339 }
7340
7341 /* Arguments are ready. Create the new vector stmt. */
7342 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7343 {
7344 vec_cond_rhs = vec_oprnds1[i];
7345 vec_then_clause = vec_oprnds2[i];
7346 vec_else_clause = vec_oprnds3[i];
7347
7348 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7349 vec_cond_lhs, vec_cond_rhs);
7350 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7351 vec_compare, vec_then_clause, vec_else_clause);
7352
7353 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7354 new_temp = make_ssa_name (vec_dest, new_stmt);
7355 gimple_assign_set_lhs (new_stmt, new_temp);
7356 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7357 if (slp_node)
7358 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7359 }
7360
7361 if (slp_node)
7362 continue;
7363
7364 if (j == 0)
7365 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7366 else
7367 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7368
7369 prev_stmt_info = vinfo_for_stmt (new_stmt);
7370 }
7371
7372 vec_oprnds0.release ();
7373 vec_oprnds1.release ();
7374 vec_oprnds2.release ();
7375 vec_oprnds3.release ();
7376
7377 return true;
7378 }
7379
7380
7381 /* Make sure the statement is vectorizable. */
7382
7383 bool
7384 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7385 {
7386 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7387 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7388 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7389 bool ok;
7390 tree scalar_type, vectype;
7391 gimple pattern_stmt;
7392 gimple_seq pattern_def_seq;
7393
7394 if (dump_enabled_p ())
7395 {
7396 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7397 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7398 }
7399
7400 if (gimple_has_volatile_ops (stmt))
7401 {
7402 if (dump_enabled_p ())
7403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7404 "not vectorized: stmt has volatile operands\n");
7405
7406 return false;
7407 }
7408
7409 /* Skip stmts that do not need to be vectorized. In loops this is expected
7410 to include:
7411 - the COND_EXPR which is the loop exit condition
7412 - any LABEL_EXPRs in the loop
7413 - computations that are used only for array indexing or loop control.
7414 In basic blocks we only analyze statements that are a part of some SLP
7415 instance, therefore, all the statements are relevant.
7416
7417 Pattern statement needs to be analyzed instead of the original statement
7418 if the original statement is not relevant. Otherwise, we analyze both
7419 statements. In basic blocks we are called from some SLP instance
7420 traversal, don't analyze pattern stmts instead, the pattern stmts
7421 already will be part of SLP instance. */
7422
7423 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7424 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7425 && !STMT_VINFO_LIVE_P (stmt_info))
7426 {
7427 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7428 && pattern_stmt
7429 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7430 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7431 {
7432 /* Analyze PATTERN_STMT instead of the original stmt. */
7433 stmt = pattern_stmt;
7434 stmt_info = vinfo_for_stmt (pattern_stmt);
7435 if (dump_enabled_p ())
7436 {
7437 dump_printf_loc (MSG_NOTE, vect_location,
7438 "==> examining pattern statement: ");
7439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7440 }
7441 }
7442 else
7443 {
7444 if (dump_enabled_p ())
7445 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7446
7447 return true;
7448 }
7449 }
7450 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7451 && node == NULL
7452 && pattern_stmt
7453 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7454 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7455 {
7456 /* Analyze PATTERN_STMT too. */
7457 if (dump_enabled_p ())
7458 {
7459 dump_printf_loc (MSG_NOTE, vect_location,
7460 "==> examining pattern statement: ");
7461 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7462 }
7463
7464 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7465 return false;
7466 }
7467
7468 if (is_pattern_stmt_p (stmt_info)
7469 && node == NULL
7470 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7471 {
7472 gimple_stmt_iterator si;
7473
7474 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7475 {
7476 gimple pattern_def_stmt = gsi_stmt (si);
7477 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7478 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7479 {
7480 /* Analyze def stmt of STMT if it's a pattern stmt. */
7481 if (dump_enabled_p ())
7482 {
7483 dump_printf_loc (MSG_NOTE, vect_location,
7484 "==> examining pattern def statement: ");
7485 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7486 }
7487
7488 if (!vect_analyze_stmt (pattern_def_stmt,
7489 need_to_vectorize, node))
7490 return false;
7491 }
7492 }
7493 }
7494
7495 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7496 {
7497 case vect_internal_def:
7498 break;
7499
7500 case vect_reduction_def:
7501 case vect_nested_cycle:
7502 gcc_assert (!bb_vinfo
7503 && (relevance == vect_used_in_outer
7504 || relevance == vect_used_in_outer_by_reduction
7505 || relevance == vect_used_by_reduction
7506 || relevance == vect_unused_in_scope));
7507 break;
7508
7509 case vect_induction_def:
7510 case vect_constant_def:
7511 case vect_external_def:
7512 case vect_unknown_def_type:
7513 default:
7514 gcc_unreachable ();
7515 }
7516
7517 if (bb_vinfo)
7518 {
7519 gcc_assert (PURE_SLP_STMT (stmt_info));
7520
7521 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7522 if (dump_enabled_p ())
7523 {
7524 dump_printf_loc (MSG_NOTE, vect_location,
7525 "get vectype for scalar type: ");
7526 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7527 dump_printf (MSG_NOTE, "\n");
7528 }
7529
7530 vectype = get_vectype_for_scalar_type (scalar_type);
7531 if (!vectype)
7532 {
7533 if (dump_enabled_p ())
7534 {
7535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7536 "not SLPed: unsupported data-type ");
7537 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7538 scalar_type);
7539 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7540 }
7541 return false;
7542 }
7543
7544 if (dump_enabled_p ())
7545 {
7546 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7547 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7548 dump_printf (MSG_NOTE, "\n");
7549 }
7550
7551 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7552 }
7553
7554 if (STMT_VINFO_RELEVANT_P (stmt_info))
7555 {
7556 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7557 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7558 || (is_gimple_call (stmt)
7559 && gimple_call_lhs (stmt) == NULL_TREE));
7560 *need_to_vectorize = true;
7561 }
7562
7563 if (PURE_SLP_STMT (stmt_info) && !node)
7564 {
7565 dump_printf_loc (MSG_NOTE, vect_location,
7566 "handled only by SLP analysis\n");
7567 return true;
7568 }
7569
7570 ok = true;
7571 if (!bb_vinfo
7572 && (STMT_VINFO_RELEVANT_P (stmt_info)
7573 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7574 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7575 || vectorizable_conversion (stmt, NULL, NULL, node)
7576 || vectorizable_shift (stmt, NULL, NULL, node)
7577 || vectorizable_operation (stmt, NULL, NULL, node)
7578 || vectorizable_assignment (stmt, NULL, NULL, node)
7579 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7580 || vectorizable_call (stmt, NULL, NULL, node)
7581 || vectorizable_store (stmt, NULL, NULL, node)
7582 || vectorizable_reduction (stmt, NULL, NULL, node)
7583 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7584 else
7585 {
7586 if (bb_vinfo)
7587 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7588 || vectorizable_conversion (stmt, NULL, NULL, node)
7589 || vectorizable_shift (stmt, NULL, NULL, node)
7590 || vectorizable_operation (stmt, NULL, NULL, node)
7591 || vectorizable_assignment (stmt, NULL, NULL, node)
7592 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7593 || vectorizable_call (stmt, NULL, NULL, node)
7594 || vectorizable_store (stmt, NULL, NULL, node)
7595 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7596 }
7597
7598 if (!ok)
7599 {
7600 if (dump_enabled_p ())
7601 {
7602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7603 "not vectorized: relevant stmt not ");
7604 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7605 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7606 }
7607
7608 return false;
7609 }
7610
7611 if (bb_vinfo)
7612 return true;
7613
7614 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7615 need extra handling, except for vectorizable reductions. */
7616 if (STMT_VINFO_LIVE_P (stmt_info)
7617 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7618 ok = vectorizable_live_operation (stmt, NULL, NULL);
7619
7620 if (!ok)
7621 {
7622 if (dump_enabled_p ())
7623 {
7624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7625 "not vectorized: live stmt not ");
7626 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7627 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7628 }
7629
7630 return false;
7631 }
7632
7633 return true;
7634 }
7635
7636
7637 /* Function vect_transform_stmt.
7638
7639 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7640
7641 bool
7642 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7643 bool *grouped_store, slp_tree slp_node,
7644 slp_instance slp_node_instance)
7645 {
7646 bool is_store = false;
7647 gimple vec_stmt = NULL;
7648 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7649 bool done;
7650
7651 gimple old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7652
7653 switch (STMT_VINFO_TYPE (stmt_info))
7654 {
7655 case type_demotion_vec_info_type:
7656 case type_promotion_vec_info_type:
7657 case type_conversion_vec_info_type:
7658 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7659 gcc_assert (done);
7660 break;
7661
7662 case induc_vec_info_type:
7663 gcc_assert (!slp_node);
7664 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7665 gcc_assert (done);
7666 break;
7667
7668 case shift_vec_info_type:
7669 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7670 gcc_assert (done);
7671 break;
7672
7673 case op_vec_info_type:
7674 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7675 gcc_assert (done);
7676 break;
7677
7678 case assignment_vec_info_type:
7679 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7680 gcc_assert (done);
7681 break;
7682
7683 case load_vec_info_type:
7684 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7685 slp_node_instance);
7686 gcc_assert (done);
7687 break;
7688
7689 case store_vec_info_type:
7690 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7691 gcc_assert (done);
7692 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7693 {
7694 /* In case of interleaving, the whole chain is vectorized when the
7695 last store in the chain is reached. Store stmts before the last
7696 one are skipped, and there vec_stmt_info shouldn't be freed
7697 meanwhile. */
7698 *grouped_store = true;
7699 if (STMT_VINFO_VEC_STMT (stmt_info))
7700 is_store = true;
7701 }
7702 else
7703 is_store = true;
7704 break;
7705
7706 case condition_vec_info_type:
7707 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7708 gcc_assert (done);
7709 break;
7710
7711 case call_vec_info_type:
7712 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7713 stmt = gsi_stmt (*gsi);
7714 if (is_gimple_call (stmt)
7715 && gimple_call_internal_p (stmt)
7716 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7717 is_store = true;
7718 break;
7719
7720 case call_simd_clone_vec_info_type:
7721 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7722 stmt = gsi_stmt (*gsi);
7723 break;
7724
7725 case reduc_vec_info_type:
7726 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7727 gcc_assert (done);
7728 break;
7729
7730 default:
7731 if (!STMT_VINFO_LIVE_P (stmt_info))
7732 {
7733 if (dump_enabled_p ())
7734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7735 "stmt not supported.\n");
7736 gcc_unreachable ();
7737 }
7738 }
7739
7740 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7741 This would break hybrid SLP vectorization. */
7742 if (slp_node)
7743 gcc_assert (!vec_stmt
7744 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
7745
7746 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7747 is being vectorized, but outside the immediately enclosing loop. */
7748 if (vec_stmt
7749 && STMT_VINFO_LOOP_VINFO (stmt_info)
7750 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7751 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7752 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7753 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7754 || STMT_VINFO_RELEVANT (stmt_info) ==
7755 vect_used_in_outer_by_reduction))
7756 {
7757 struct loop *innerloop = LOOP_VINFO_LOOP (
7758 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7759 imm_use_iterator imm_iter;
7760 use_operand_p use_p;
7761 tree scalar_dest;
7762 gimple exit_phi;
7763
7764 if (dump_enabled_p ())
7765 dump_printf_loc (MSG_NOTE, vect_location,
7766 "Record the vdef for outer-loop vectorization.\n");
7767
7768 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7769 (to be used when vectorizing outer-loop stmts that use the DEF of
7770 STMT). */
7771 if (gimple_code (stmt) == GIMPLE_PHI)
7772 scalar_dest = PHI_RESULT (stmt);
7773 else
7774 scalar_dest = gimple_assign_lhs (stmt);
7775
7776 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7777 {
7778 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7779 {
7780 exit_phi = USE_STMT (use_p);
7781 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7782 }
7783 }
7784 }
7785
7786 /* Handle stmts whose DEF is used outside the loop-nest that is
7787 being vectorized. */
7788 if (STMT_VINFO_LIVE_P (stmt_info)
7789 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7790 {
7791 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7792 gcc_assert (done);
7793 }
7794
7795 if (vec_stmt)
7796 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7797
7798 return is_store;
7799 }
7800
7801
7802 /* Remove a group of stores (for SLP or interleaving), free their
7803 stmt_vec_info. */
7804
7805 void
7806 vect_remove_stores (gimple first_stmt)
7807 {
7808 gimple next = first_stmt;
7809 gimple tmp;
7810 gimple_stmt_iterator next_si;
7811
7812 while (next)
7813 {
7814 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7815
7816 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7817 if (is_pattern_stmt_p (stmt_info))
7818 next = STMT_VINFO_RELATED_STMT (stmt_info);
7819 /* Free the attached stmt_vec_info and remove the stmt. */
7820 next_si = gsi_for_stmt (next);
7821 unlink_stmt_vdef (next);
7822 gsi_remove (&next_si, true);
7823 release_defs (next);
7824 free_stmt_vec_info (next);
7825 next = tmp;
7826 }
7827 }
7828
7829
7830 /* Function new_stmt_vec_info.
7831
7832 Create and initialize a new stmt_vec_info struct for STMT. */
7833
7834 stmt_vec_info
7835 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7836 bb_vec_info bb_vinfo)
7837 {
7838 stmt_vec_info res;
7839 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7840
7841 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7842 STMT_VINFO_STMT (res) = stmt;
7843 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7844 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7845 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7846 STMT_VINFO_LIVE_P (res) = false;
7847 STMT_VINFO_VECTYPE (res) = NULL;
7848 STMT_VINFO_VEC_STMT (res) = NULL;
7849 STMT_VINFO_VECTORIZABLE (res) = true;
7850 STMT_VINFO_IN_PATTERN_P (res) = false;
7851 STMT_VINFO_RELATED_STMT (res) = NULL;
7852 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7853 STMT_VINFO_DATA_REF (res) = NULL;
7854
7855 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7856 STMT_VINFO_DR_OFFSET (res) = NULL;
7857 STMT_VINFO_DR_INIT (res) = NULL;
7858 STMT_VINFO_DR_STEP (res) = NULL;
7859 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7860
7861 if (gimple_code (stmt) == GIMPLE_PHI
7862 && is_loop_header_bb_p (gimple_bb (stmt)))
7863 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7864 else
7865 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7866
7867 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7868 STMT_SLP_TYPE (res) = loop_vect;
7869 GROUP_FIRST_ELEMENT (res) = NULL;
7870 GROUP_NEXT_ELEMENT (res) = NULL;
7871 GROUP_SIZE (res) = 0;
7872 GROUP_STORE_COUNT (res) = 0;
7873 GROUP_GAP (res) = 0;
7874 GROUP_SAME_DR_STMT (res) = NULL;
7875
7876 return res;
7877 }
7878
7879
7880 /* Create a hash table for stmt_vec_info. */
7881
7882 void
7883 init_stmt_vec_info_vec (void)
7884 {
7885 gcc_assert (!stmt_vec_info_vec.exists ());
7886 stmt_vec_info_vec.create (50);
7887 }
7888
7889
7890 /* Free hash table for stmt_vec_info. */
7891
7892 void
7893 free_stmt_vec_info_vec (void)
7894 {
7895 unsigned int i;
7896 vec_void_p info;
7897 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7898 if (info != NULL)
7899 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7900 gcc_assert (stmt_vec_info_vec.exists ());
7901 stmt_vec_info_vec.release ();
7902 }
7903
7904
7905 /* Free stmt vectorization related info. */
7906
7907 void
7908 free_stmt_vec_info (gimple stmt)
7909 {
7910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7911
7912 if (!stmt_info)
7913 return;
7914
7915 /* Check if this statement has a related "pattern stmt"
7916 (introduced by the vectorizer during the pattern recognition
7917 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7918 too. */
7919 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7920 {
7921 stmt_vec_info patt_info
7922 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7923 if (patt_info)
7924 {
7925 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7926 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7927 gimple_set_bb (patt_stmt, NULL);
7928 tree lhs = gimple_get_lhs (patt_stmt);
7929 if (TREE_CODE (lhs) == SSA_NAME)
7930 release_ssa_name (lhs);
7931 if (seq)
7932 {
7933 gimple_stmt_iterator si;
7934 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7935 {
7936 gimple seq_stmt = gsi_stmt (si);
7937 gimple_set_bb (seq_stmt, NULL);
7938 lhs = gimple_get_lhs (patt_stmt);
7939 if (TREE_CODE (lhs) == SSA_NAME)
7940 release_ssa_name (lhs);
7941 free_stmt_vec_info (seq_stmt);
7942 }
7943 }
7944 free_stmt_vec_info (patt_stmt);
7945 }
7946 }
7947
7948 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7949 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7950 set_vinfo_for_stmt (stmt, NULL);
7951 free (stmt_info);
7952 }
7953
7954
7955 /* Function get_vectype_for_scalar_type_and_size.
7956
7957 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7958 by the target. */
7959
7960 static tree
7961 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7962 {
7963 machine_mode inner_mode = TYPE_MODE (scalar_type);
7964 machine_mode simd_mode;
7965 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7966 int nunits;
7967 tree vectype;
7968
7969 if (nbytes == 0)
7970 return NULL_TREE;
7971
7972 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7973 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7974 return NULL_TREE;
7975
7976 /* For vector types of elements whose mode precision doesn't
7977 match their types precision we use a element type of mode
7978 precision. The vectorization routines will have to make sure
7979 they support the proper result truncation/extension.
7980 We also make sure to build vector types with INTEGER_TYPE
7981 component type only. */
7982 if (INTEGRAL_TYPE_P (scalar_type)
7983 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7984 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7985 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7986 TYPE_UNSIGNED (scalar_type));
7987
7988 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7989 When the component mode passes the above test simply use a type
7990 corresponding to that mode. The theory is that any use that
7991 would cause problems with this will disable vectorization anyway. */
7992 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7993 && !INTEGRAL_TYPE_P (scalar_type))
7994 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7995
7996 /* We can't build a vector type of elements with alignment bigger than
7997 their size. */
7998 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7999 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8000 TYPE_UNSIGNED (scalar_type));
8001
8002 /* If we felt back to using the mode fail if there was
8003 no scalar type for it. */
8004 if (scalar_type == NULL_TREE)
8005 return NULL_TREE;
8006
8007 /* If no size was supplied use the mode the target prefers. Otherwise
8008 lookup a vector mode of the specified size. */
8009 if (size == 0)
8010 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8011 else
8012 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8013 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8014 if (nunits <= 1)
8015 return NULL_TREE;
8016
8017 vectype = build_vector_type (scalar_type, nunits);
8018
8019 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8020 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8021 return NULL_TREE;
8022
8023 return vectype;
8024 }
8025
8026 unsigned int current_vector_size;
8027
8028 /* Function get_vectype_for_scalar_type.
8029
8030 Returns the vector type corresponding to SCALAR_TYPE as supported
8031 by the target. */
8032
8033 tree
8034 get_vectype_for_scalar_type (tree scalar_type)
8035 {
8036 tree vectype;
8037 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8038 current_vector_size);
8039 if (vectype
8040 && current_vector_size == 0)
8041 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8042 return vectype;
8043 }
8044
8045 /* Function get_same_sized_vectype
8046
8047 Returns a vector type corresponding to SCALAR_TYPE of size
8048 VECTOR_TYPE if supported by the target. */
8049
8050 tree
8051 get_same_sized_vectype (tree scalar_type, tree vector_type)
8052 {
8053 return get_vectype_for_scalar_type_and_size
8054 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8055 }
8056
8057 /* Function vect_is_simple_use.
8058
8059 Input:
8060 LOOP_VINFO - the vect info of the loop that is being vectorized.
8061 BB_VINFO - the vect info of the basic block that is being vectorized.
8062 OPERAND - operand of STMT in the loop or bb.
8063 DEF - the defining stmt in case OPERAND is an SSA_NAME.
8064
8065 Returns whether a stmt with OPERAND can be vectorized.
8066 For loops, supportable operands are constants, loop invariants, and operands
8067 that are defined by the current iteration of the loop. Unsupportable
8068 operands are those that are defined by a previous iteration of the loop (as
8069 is the case in reduction/induction computations).
8070 For basic blocks, supportable operands are constants and bb invariants.
8071 For now, operands defined outside the basic block are not supported. */
8072
8073 bool
8074 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8075 bb_vec_info bb_vinfo, gimple *def_stmt,
8076 tree *def, enum vect_def_type *dt)
8077 {
8078 *def_stmt = NULL;
8079 *def = NULL_TREE;
8080 *dt = vect_unknown_def_type;
8081
8082 if (dump_enabled_p ())
8083 {
8084 dump_printf_loc (MSG_NOTE, vect_location,
8085 "vect_is_simple_use: operand ");
8086 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8087 dump_printf (MSG_NOTE, "\n");
8088 }
8089
8090 if (CONSTANT_CLASS_P (operand))
8091 {
8092 *dt = vect_constant_def;
8093 return true;
8094 }
8095
8096 if (is_gimple_min_invariant (operand))
8097 {
8098 *def = operand;
8099 *dt = vect_external_def;
8100 return true;
8101 }
8102
8103 if (TREE_CODE (operand) != SSA_NAME)
8104 {
8105 if (dump_enabled_p ())
8106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8107 "not ssa-name.\n");
8108 return false;
8109 }
8110
8111 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8112 {
8113 *def = operand;
8114 *dt = vect_external_def;
8115 return true;
8116 }
8117
8118 *def_stmt = SSA_NAME_DEF_STMT (operand);
8119 if (dump_enabled_p ())
8120 {
8121 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8122 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8123 }
8124
8125 basic_block bb = gimple_bb (*def_stmt);
8126 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
8127 || (bb_vinfo
8128 && (bb != BB_VINFO_BB (bb_vinfo)
8129 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8130 *dt = vect_external_def;
8131 else
8132 {
8133 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8134 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8135 *dt = vect_external_def;
8136 else
8137 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8138 }
8139
8140 if (dump_enabled_p ())
8141 {
8142 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8143 switch (*dt)
8144 {
8145 case vect_uninitialized_def:
8146 dump_printf (MSG_NOTE, "uninitialized\n");
8147 break;
8148 case vect_constant_def:
8149 dump_printf (MSG_NOTE, "constant\n");
8150 break;
8151 case vect_external_def:
8152 dump_printf (MSG_NOTE, "external\n");
8153 break;
8154 case vect_internal_def:
8155 dump_printf (MSG_NOTE, "internal\n");
8156 break;
8157 case vect_induction_def:
8158 dump_printf (MSG_NOTE, "induction\n");
8159 break;
8160 case vect_reduction_def:
8161 dump_printf (MSG_NOTE, "reduction\n");
8162 break;
8163 case vect_double_reduction_def:
8164 dump_printf (MSG_NOTE, "double reduction\n");
8165 break;
8166 case vect_nested_cycle:
8167 dump_printf (MSG_NOTE, "nested cycle\n");
8168 break;
8169 case vect_unknown_def_type:
8170 dump_printf (MSG_NOTE, "unknown\n");
8171 break;
8172 }
8173 }
8174
8175 if (*dt == vect_unknown_def_type
8176 || (stmt
8177 && *dt == vect_double_reduction_def
8178 && gimple_code (stmt) != GIMPLE_PHI))
8179 {
8180 if (dump_enabled_p ())
8181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8182 "Unsupported pattern.\n");
8183 return false;
8184 }
8185
8186 switch (gimple_code (*def_stmt))
8187 {
8188 case GIMPLE_PHI:
8189 *def = gimple_phi_result (*def_stmt);
8190 break;
8191
8192 case GIMPLE_ASSIGN:
8193 *def = gimple_assign_lhs (*def_stmt);
8194 break;
8195
8196 case GIMPLE_CALL:
8197 *def = gimple_call_lhs (*def_stmt);
8198 if (*def != NULL)
8199 break;
8200 /* FALLTHRU */
8201 default:
8202 if (dump_enabled_p ())
8203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8204 "unsupported defining stmt:\n");
8205 return false;
8206 }
8207
8208 return true;
8209 }
8210
8211 /* Function vect_is_simple_use_1.
8212
8213 Same as vect_is_simple_use_1 but also determines the vector operand
8214 type of OPERAND and stores it to *VECTYPE. If the definition of
8215 OPERAND is vect_uninitialized_def, vect_constant_def or
8216 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8217 is responsible to compute the best suited vector type for the
8218 scalar operand. */
8219
8220 bool
8221 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8222 bb_vec_info bb_vinfo, gimple *def_stmt,
8223 tree *def, enum vect_def_type *dt, tree *vectype)
8224 {
8225 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8226 def, dt))
8227 return false;
8228
8229 /* Now get a vector type if the def is internal, otherwise supply
8230 NULL_TREE and leave it up to the caller to figure out a proper
8231 type for the use stmt. */
8232 if (*dt == vect_internal_def
8233 || *dt == vect_induction_def
8234 || *dt == vect_reduction_def
8235 || *dt == vect_double_reduction_def
8236 || *dt == vect_nested_cycle)
8237 {
8238 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8239
8240 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8241 && !STMT_VINFO_RELEVANT (stmt_info)
8242 && !STMT_VINFO_LIVE_P (stmt_info))
8243 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8244
8245 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8246 gcc_assert (*vectype != NULL_TREE);
8247 }
8248 else if (*dt == vect_uninitialized_def
8249 || *dt == vect_constant_def
8250 || *dt == vect_external_def)
8251 *vectype = NULL_TREE;
8252 else
8253 gcc_unreachable ();
8254
8255 return true;
8256 }
8257
8258
8259 /* Function supportable_widening_operation
8260
8261 Check whether an operation represented by the code CODE is a
8262 widening operation that is supported by the target platform in
8263 vector form (i.e., when operating on arguments of type VECTYPE_IN
8264 producing a result of type VECTYPE_OUT).
8265
8266 Widening operations we currently support are NOP (CONVERT), FLOAT
8267 and WIDEN_MULT. This function checks if these operations are supported
8268 by the target platform either directly (via vector tree-codes), or via
8269 target builtins.
8270
8271 Output:
8272 - CODE1 and CODE2 are codes of vector operations to be used when
8273 vectorizing the operation, if available.
8274 - MULTI_STEP_CVT determines the number of required intermediate steps in
8275 case of multi-step conversion (like char->short->int - in that case
8276 MULTI_STEP_CVT will be 1).
8277 - INTERM_TYPES contains the intermediate type required to perform the
8278 widening operation (short in the above example). */
8279
8280 bool
8281 supportable_widening_operation (enum tree_code code, gimple stmt,
8282 tree vectype_out, tree vectype_in,
8283 enum tree_code *code1, enum tree_code *code2,
8284 int *multi_step_cvt,
8285 vec<tree> *interm_types)
8286 {
8287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8288 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8289 struct loop *vect_loop = NULL;
8290 machine_mode vec_mode;
8291 enum insn_code icode1, icode2;
8292 optab optab1, optab2;
8293 tree vectype = vectype_in;
8294 tree wide_vectype = vectype_out;
8295 enum tree_code c1, c2;
8296 int i;
8297 tree prev_type, intermediate_type;
8298 machine_mode intermediate_mode, prev_mode;
8299 optab optab3, optab4;
8300
8301 *multi_step_cvt = 0;
8302 if (loop_info)
8303 vect_loop = LOOP_VINFO_LOOP (loop_info);
8304
8305 switch (code)
8306 {
8307 case WIDEN_MULT_EXPR:
8308 /* The result of a vectorized widening operation usually requires
8309 two vectors (because the widened results do not fit into one vector).
8310 The generated vector results would normally be expected to be
8311 generated in the same order as in the original scalar computation,
8312 i.e. if 8 results are generated in each vector iteration, they are
8313 to be organized as follows:
8314 vect1: [res1,res2,res3,res4],
8315 vect2: [res5,res6,res7,res8].
8316
8317 However, in the special case that the result of the widening
8318 operation is used in a reduction computation only, the order doesn't
8319 matter (because when vectorizing a reduction we change the order of
8320 the computation). Some targets can take advantage of this and
8321 generate more efficient code. For example, targets like Altivec,
8322 that support widen_mult using a sequence of {mult_even,mult_odd}
8323 generate the following vectors:
8324 vect1: [res1,res3,res5,res7],
8325 vect2: [res2,res4,res6,res8].
8326
8327 When vectorizing outer-loops, we execute the inner-loop sequentially
8328 (each vectorized inner-loop iteration contributes to VF outer-loop
8329 iterations in parallel). We therefore don't allow to change the
8330 order of the computation in the inner-loop during outer-loop
8331 vectorization. */
8332 /* TODO: Another case in which order doesn't *really* matter is when we
8333 widen and then contract again, e.g. (short)((int)x * y >> 8).
8334 Normally, pack_trunc performs an even/odd permute, whereas the
8335 repack from an even/odd expansion would be an interleave, which
8336 would be significantly simpler for e.g. AVX2. */
8337 /* In any case, in order to avoid duplicating the code below, recurse
8338 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8339 are properly set up for the caller. If we fail, we'll continue with
8340 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8341 if (vect_loop
8342 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8343 && !nested_in_vect_loop_p (vect_loop, stmt)
8344 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8345 stmt, vectype_out, vectype_in,
8346 code1, code2, multi_step_cvt,
8347 interm_types))
8348 {
8349 /* Elements in a vector with vect_used_by_reduction property cannot
8350 be reordered if the use chain with this property does not have the
8351 same operation. One such an example is s += a * b, where elements
8352 in a and b cannot be reordered. Here we check if the vector defined
8353 by STMT is only directly used in the reduction statement. */
8354 tree lhs = gimple_assign_lhs (stmt);
8355 use_operand_p dummy;
8356 gimple use_stmt;
8357 stmt_vec_info use_stmt_info = NULL;
8358 if (single_imm_use (lhs, &dummy, &use_stmt)
8359 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8360 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8361 return true;
8362 }
8363 c1 = VEC_WIDEN_MULT_LO_EXPR;
8364 c2 = VEC_WIDEN_MULT_HI_EXPR;
8365 break;
8366
8367 case VEC_WIDEN_MULT_EVEN_EXPR:
8368 /* Support the recursion induced just above. */
8369 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8370 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8371 break;
8372
8373 case WIDEN_LSHIFT_EXPR:
8374 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8375 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8376 break;
8377
8378 CASE_CONVERT:
8379 c1 = VEC_UNPACK_LO_EXPR;
8380 c2 = VEC_UNPACK_HI_EXPR;
8381 break;
8382
8383 case FLOAT_EXPR:
8384 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8385 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8386 break;
8387
8388 case FIX_TRUNC_EXPR:
8389 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8390 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8391 computing the operation. */
8392 return false;
8393
8394 default:
8395 gcc_unreachable ();
8396 }
8397
8398 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8399 std::swap (c1, c2);
8400
8401 if (code == FIX_TRUNC_EXPR)
8402 {
8403 /* The signedness is determined from output operand. */
8404 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8405 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8406 }
8407 else
8408 {
8409 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8410 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8411 }
8412
8413 if (!optab1 || !optab2)
8414 return false;
8415
8416 vec_mode = TYPE_MODE (vectype);
8417 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8418 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8419 return false;
8420
8421 *code1 = c1;
8422 *code2 = c2;
8423
8424 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8425 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8426 return true;
8427
8428 /* Check if it's a multi-step conversion that can be done using intermediate
8429 types. */
8430
8431 prev_type = vectype;
8432 prev_mode = vec_mode;
8433
8434 if (!CONVERT_EXPR_CODE_P (code))
8435 return false;
8436
8437 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8438 intermediate steps in promotion sequence. We try
8439 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8440 not. */
8441 interm_types->create (MAX_INTERM_CVT_STEPS);
8442 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8443 {
8444 intermediate_mode = insn_data[icode1].operand[0].mode;
8445 intermediate_type
8446 = lang_hooks.types.type_for_mode (intermediate_mode,
8447 TYPE_UNSIGNED (prev_type));
8448 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8449 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8450
8451 if (!optab3 || !optab4
8452 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8453 || insn_data[icode1].operand[0].mode != intermediate_mode
8454 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8455 || insn_data[icode2].operand[0].mode != intermediate_mode
8456 || ((icode1 = optab_handler (optab3, intermediate_mode))
8457 == CODE_FOR_nothing)
8458 || ((icode2 = optab_handler (optab4, intermediate_mode))
8459 == CODE_FOR_nothing))
8460 break;
8461
8462 interm_types->quick_push (intermediate_type);
8463 (*multi_step_cvt)++;
8464
8465 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8466 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8467 return true;
8468
8469 prev_type = intermediate_type;
8470 prev_mode = intermediate_mode;
8471 }
8472
8473 interm_types->release ();
8474 return false;
8475 }
8476
8477
8478 /* Function supportable_narrowing_operation
8479
8480 Check whether an operation represented by the code CODE is a
8481 narrowing operation that is supported by the target platform in
8482 vector form (i.e., when operating on arguments of type VECTYPE_IN
8483 and producing a result of type VECTYPE_OUT).
8484
8485 Narrowing operations we currently support are NOP (CONVERT) and
8486 FIX_TRUNC. This function checks if these operations are supported by
8487 the target platform directly via vector tree-codes.
8488
8489 Output:
8490 - CODE1 is the code of a vector operation to be used when
8491 vectorizing the operation, if available.
8492 - MULTI_STEP_CVT determines the number of required intermediate steps in
8493 case of multi-step conversion (like int->short->char - in that case
8494 MULTI_STEP_CVT will be 1).
8495 - INTERM_TYPES contains the intermediate type required to perform the
8496 narrowing operation (short in the above example). */
8497
8498 bool
8499 supportable_narrowing_operation (enum tree_code code,
8500 tree vectype_out, tree vectype_in,
8501 enum tree_code *code1, int *multi_step_cvt,
8502 vec<tree> *interm_types)
8503 {
8504 machine_mode vec_mode;
8505 enum insn_code icode1;
8506 optab optab1, interm_optab;
8507 tree vectype = vectype_in;
8508 tree narrow_vectype = vectype_out;
8509 enum tree_code c1;
8510 tree intermediate_type;
8511 machine_mode intermediate_mode, prev_mode;
8512 int i;
8513 bool uns;
8514
8515 *multi_step_cvt = 0;
8516 switch (code)
8517 {
8518 CASE_CONVERT:
8519 c1 = VEC_PACK_TRUNC_EXPR;
8520 break;
8521
8522 case FIX_TRUNC_EXPR:
8523 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8524 break;
8525
8526 case FLOAT_EXPR:
8527 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8528 tree code and optabs used for computing the operation. */
8529 return false;
8530
8531 default:
8532 gcc_unreachable ();
8533 }
8534
8535 if (code == FIX_TRUNC_EXPR)
8536 /* The signedness is determined from output operand. */
8537 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8538 else
8539 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8540
8541 if (!optab1)
8542 return false;
8543
8544 vec_mode = TYPE_MODE (vectype);
8545 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8546 return false;
8547
8548 *code1 = c1;
8549
8550 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8551 return true;
8552
8553 /* Check if it's a multi-step conversion that can be done using intermediate
8554 types. */
8555 prev_mode = vec_mode;
8556 if (code == FIX_TRUNC_EXPR)
8557 uns = TYPE_UNSIGNED (vectype_out);
8558 else
8559 uns = TYPE_UNSIGNED (vectype);
8560
8561 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8562 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8563 costly than signed. */
8564 if (code == FIX_TRUNC_EXPR && uns)
8565 {
8566 enum insn_code icode2;
8567
8568 intermediate_type
8569 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8570 interm_optab
8571 = optab_for_tree_code (c1, intermediate_type, optab_default);
8572 if (interm_optab != unknown_optab
8573 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8574 && insn_data[icode1].operand[0].mode
8575 == insn_data[icode2].operand[0].mode)
8576 {
8577 uns = false;
8578 optab1 = interm_optab;
8579 icode1 = icode2;
8580 }
8581 }
8582
8583 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8584 intermediate steps in promotion sequence. We try
8585 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8586 interm_types->create (MAX_INTERM_CVT_STEPS);
8587 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8588 {
8589 intermediate_mode = insn_data[icode1].operand[0].mode;
8590 intermediate_type
8591 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8592 interm_optab
8593 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8594 optab_default);
8595 if (!interm_optab
8596 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8597 || insn_data[icode1].operand[0].mode != intermediate_mode
8598 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8599 == CODE_FOR_nothing))
8600 break;
8601
8602 interm_types->quick_push (intermediate_type);
8603 (*multi_step_cvt)++;
8604
8605 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8606 return true;
8607
8608 prev_mode = intermediate_mode;
8609 optab1 = interm_optab;
8610 }
8611
8612 interm_types->release ();
8613 return false;
8614 }