decl.c, [...]: Remove redundant enum from machine_mode.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
66 #include "dumpfile.h"
67 #include "hash-map.h"
68 #include "plugin-api.h"
69 #include "ipa-ref.h"
70 #include "cgraph.h"
71 #include "builtins.h"
72
73 /* For lang_hooks.types.type_for_mode. */
74 #include "langhooks.h"
75
76 /* Return the vectorized type for the given statement. */
77
78 tree
79 stmt_vectype (struct _stmt_vec_info *stmt_info)
80 {
81 return STMT_VINFO_VECTYPE (stmt_info);
82 }
83
84 /* Return TRUE iff the given statement is in an inner loop relative to
85 the loop being vectorized. */
86 bool
87 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
88 {
89 gimple stmt = STMT_VINFO_STMT (stmt_info);
90 basic_block bb = gimple_bb (stmt);
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 struct loop* loop;
93
94 if (!loop_vinfo)
95 return false;
96
97 loop = LOOP_VINFO_LOOP (loop_vinfo);
98
99 return (bb->loop_father == loop->inner);
100 }
101
102 /* Record the cost of a statement, either by directly informing the
103 target model or by saving it in a vector for later processing.
104 Return a preliminary estimate of the statement's cost. */
105
106 unsigned
107 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
108 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
109 int misalign, enum vect_cost_model_location where)
110 {
111 if (body_cost_vec)
112 {
113 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
114 add_stmt_info_to_vec (body_cost_vec, count, kind,
115 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
116 misalign);
117 return (unsigned)
118 (builtin_vectorization_cost (kind, vectype, misalign) * count);
119
120 }
121 else
122 {
123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
125 void *target_cost_data;
126
127 if (loop_vinfo)
128 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
129 else
130 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
131
132 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
133 misalign, where);
134 }
135 }
136
137 /* Return a variable of type ELEM_TYPE[NELEMS]. */
138
139 static tree
140 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
141 {
142 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
143 "vect_array");
144 }
145
146 /* ARRAY is an array of vectors created by create_vector_array.
147 Return an SSA_NAME for the vector in index N. The reference
148 is part of the vectorization of STMT and the vector is associated
149 with scalar destination SCALAR_DEST. */
150
151 static tree
152 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
153 tree array, unsigned HOST_WIDE_INT n)
154 {
155 tree vect_type, vect, vect_name, array_ref;
156 gimple new_stmt;
157
158 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
159 vect_type = TREE_TYPE (TREE_TYPE (array));
160 vect = vect_create_destination_var (scalar_dest, vect_type);
161 array_ref = build4 (ARRAY_REF, vect_type, array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (vect, array_ref);
166 vect_name = make_ssa_name (vect, new_stmt);
167 gimple_assign_set_lhs (new_stmt, vect_name);
168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
169
170 return vect_name;
171 }
172
173 /* ARRAY is an array of vectors created by create_vector_array.
174 Emit code to store SSA_NAME VECT in index N of the array.
175 The store is part of the vectorization of STMT. */
176
177 static void
178 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
179 tree array, unsigned HOST_WIDE_INT n)
180 {
181 tree array_ref;
182 gimple new_stmt;
183
184 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
185 build_int_cst (size_type_node, n),
186 NULL_TREE, NULL_TREE);
187
188 new_stmt = gimple_build_assign (array_ref, vect);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190 }
191
192 /* PTR is a pointer to an array of type TYPE. Return a representation
193 of *PTR. The memory reference replaces those in FIRST_DR
194 (and its group). */
195
196 static tree
197 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
198 {
199 tree mem_ref, alias_ptr_type;
200
201 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
202 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
203 /* Arrays have the same alignment as their type. */
204 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
205 return mem_ref;
206 }
207
208 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
209
210 /* Function vect_mark_relevant.
211
212 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
213
214 static void
215 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
216 enum vect_relevant relevant, bool live_p,
217 bool used_in_pattern)
218 {
219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
220 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
221 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
222 gimple pattern_stmt;
223
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE, vect_location,
226 "mark relevant %d, live %d.\n", relevant, live_p);
227
228 /* If this stmt is an original stmt in a pattern, we might need to mark its
229 related pattern stmt instead of the original stmt. However, such stmts
230 may have their own uses that are not in any pattern, in such cases the
231 stmt itself should be marked. */
232 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
233 {
234 bool found = false;
235 if (!used_in_pattern)
236 {
237 imm_use_iterator imm_iter;
238 use_operand_p use_p;
239 gimple use_stmt;
240 tree lhs;
241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
243
244 if (is_gimple_assign (stmt))
245 lhs = gimple_assign_lhs (stmt);
246 else
247 lhs = gimple_call_lhs (stmt);
248
249 /* This use is out of pattern use, if LHS has other uses that are
250 pattern uses, we should mark the stmt itself, and not the pattern
251 stmt. */
252 if (lhs && TREE_CODE (lhs) == SSA_NAME)
253 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
254 {
255 if (is_gimple_debug (USE_STMT (use_p)))
256 continue;
257 use_stmt = USE_STMT (use_p);
258
259 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
260 continue;
261
262 if (vinfo_for_stmt (use_stmt)
263 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
264 {
265 found = true;
266 break;
267 }
268 }
269 }
270
271 if (!found)
272 {
273 /* This is the last stmt in a sequence that was detected as a
274 pattern that can potentially be vectorized. Don't mark the stmt
275 as relevant/live because it's not going to be vectorized.
276 Instead mark the pattern-stmt that replaces it. */
277
278 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
279
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "last stmt in pattern. don't mark"
283 " relevant/live.\n");
284 stmt_info = vinfo_for_stmt (pattern_stmt);
285 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
286 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
287 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
288 stmt = pattern_stmt;
289 }
290 }
291
292 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
293 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
294 STMT_VINFO_RELEVANT (stmt_info) = relevant;
295
296 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
297 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
298 {
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "already marked relevant/live.\n");
302 return;
303 }
304
305 worklist->safe_push (stmt);
306 }
307
308
309 /* Function vect_stmt_relevant_p.
310
311 Return true if STMT in loop that is represented by LOOP_VINFO is
312 "relevant for vectorization".
313
314 A stmt is considered "relevant for vectorization" if:
315 - it has uses outside the loop.
316 - it has vdefs (it alters memory).
317 - control stmts in the loop (except for the exit condition).
318
319 CHECKME: what other side effects would the vectorizer allow? */
320
321 static bool
322 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
323 enum vect_relevant *relevant, bool *live_p)
324 {
325 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
326 ssa_op_iter op_iter;
327 imm_use_iterator imm_iter;
328 use_operand_p use_p;
329 def_operand_p def_p;
330
331 *relevant = vect_unused_in_scope;
332 *live_p = false;
333
334 /* cond stmt other than loop exit cond. */
335 if (is_ctrl_stmt (stmt)
336 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
337 != loop_exit_ctrl_vec_info_type)
338 *relevant = vect_used_in_scope;
339
340 /* changing memory. */
341 if (gimple_code (stmt) != GIMPLE_PHI)
342 if (gimple_vdef (stmt))
343 {
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE, vect_location,
346 "vec_stmt_relevant_p: stmt has vdefs.\n");
347 *relevant = vect_used_in_scope;
348 }
349
350 /* uses outside the loop. */
351 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
352 {
353 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
354 {
355 basic_block bb = gimple_bb (USE_STMT (use_p));
356 if (!flow_bb_inside_loop_p (loop, bb))
357 {
358 if (dump_enabled_p ())
359 dump_printf_loc (MSG_NOTE, vect_location,
360 "vec_stmt_relevant_p: used out of loop.\n");
361
362 if (is_gimple_debug (USE_STMT (use_p)))
363 continue;
364
365 /* We expect all such uses to be in the loop exit phis
366 (because of loop closed form) */
367 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
368 gcc_assert (bb == single_exit (loop)->dest);
369
370 *live_p = true;
371 }
372 }
373 }
374
375 return (*live_p || *relevant);
376 }
377
378
379 /* Function exist_non_indexing_operands_for_use_p
380
381 USE is one of the uses attached to STMT. Check if USE is
382 used in STMT for anything other than indexing an array. */
383
384 static bool
385 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
386 {
387 tree operand;
388 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
389
390 /* USE corresponds to some operand in STMT. If there is no data
391 reference in STMT, then any operand that corresponds to USE
392 is not indexing an array. */
393 if (!STMT_VINFO_DATA_REF (stmt_info))
394 return true;
395
396 /* STMT has a data_ref. FORNOW this means that its of one of
397 the following forms:
398 -1- ARRAY_REF = var
399 -2- var = ARRAY_REF
400 (This should have been verified in analyze_data_refs).
401
402 'var' in the second case corresponds to a def, not a use,
403 so USE cannot correspond to any operands that are not used
404 for array indexing.
405
406 Therefore, all we need to check is if STMT falls into the
407 first case, and whether var corresponds to USE. */
408
409 if (!gimple_assign_copy_p (stmt))
410 {
411 if (is_gimple_call (stmt)
412 && gimple_call_internal_p (stmt))
413 switch (gimple_call_internal_fn (stmt))
414 {
415 case IFN_MASK_STORE:
416 operand = gimple_call_arg (stmt, 3);
417 if (operand == use)
418 return true;
419 /* FALLTHRU */
420 case IFN_MASK_LOAD:
421 operand = gimple_call_arg (stmt, 2);
422 if (operand == use)
423 return true;
424 break;
425 default:
426 break;
427 }
428 return false;
429 }
430
431 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
432 return false;
433 operand = gimple_assign_rhs1 (stmt);
434 if (TREE_CODE (operand) != SSA_NAME)
435 return false;
436
437 if (operand == use)
438 return true;
439
440 return false;
441 }
442
443
444 /*
445 Function process_use.
446
447 Inputs:
448 - a USE in STMT in a loop represented by LOOP_VINFO
449 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
450 that defined USE. This is done by calling mark_relevant and passing it
451 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
452 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
453 be performed.
454
455 Outputs:
456 Generally, LIVE_P and RELEVANT are used to define the liveness and
457 relevance info of the DEF_STMT of this USE:
458 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
459 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
460 Exceptions:
461 - case 1: If USE is used only for address computations (e.g. array indexing),
462 which does not need to be directly vectorized, then the liveness/relevance
463 of the respective DEF_STMT is left unchanged.
464 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
465 skip DEF_STMT cause it had already been processed.
466 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
467 be modified accordingly.
468
469 Return true if everything is as expected. Return false otherwise. */
470
471 static bool
472 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
473 enum vect_relevant relevant, vec<gimple> *worklist,
474 bool force)
475 {
476 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
477 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
478 stmt_vec_info dstmt_vinfo;
479 basic_block bb, def_bb;
480 tree def;
481 gimple def_stmt;
482 enum vect_def_type dt;
483
484 /* case 1: we are only interested in uses that need to be vectorized. Uses
485 that are used for address computation are not considered relevant. */
486 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
487 return true;
488
489 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
490 {
491 if (dump_enabled_p ())
492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
493 "not vectorized: unsupported use in stmt.\n");
494 return false;
495 }
496
497 if (!def_stmt || gimple_nop_p (def_stmt))
498 return true;
499
500 def_bb = gimple_bb (def_stmt);
501 if (!flow_bb_inside_loop_p (loop, def_bb))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
505 return true;
506 }
507
508 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
509 DEF_STMT must have already been processed, because this should be the
510 only way that STMT, which is a reduction-phi, was put in the worklist,
511 as there should be no other uses for DEF_STMT in the loop. So we just
512 check that everything is as expected, and we are done. */
513 dstmt_vinfo = vinfo_for_stmt (def_stmt);
514 bb = gimple_bb (stmt);
515 if (gimple_code (stmt) == GIMPLE_PHI
516 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
517 && gimple_code (def_stmt) != GIMPLE_PHI
518 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
519 && bb->loop_father == def_bb->loop_father)
520 {
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE, vect_location,
523 "reduc-stmt defining reduc-phi in the same nest.\n");
524 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
525 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
526 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
527 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
528 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
529 return true;
530 }
531
532 /* case 3a: outer-loop stmt defining an inner-loop stmt:
533 outer-loop-header-bb:
534 d = def_stmt
535 inner-loop:
536 stmt # use (d)
537 outer-loop-tail-bb:
538 ... */
539 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "outer-loop def-stmt defining inner-loop stmt.\n");
544
545 switch (relevant)
546 {
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
549 vect_used_in_scope : vect_unused_in_scope;
550 break;
551
552 case vect_used_in_outer_by_reduction:
553 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
554 relevant = vect_used_by_reduction;
555 break;
556
557 case vect_used_in_outer:
558 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
559 relevant = vect_used_in_scope;
560 break;
561
562 case vect_used_in_scope:
563 break;
564
565 default:
566 gcc_unreachable ();
567 }
568 }
569
570 /* case 3b: inner-loop stmt defining an outer-loop stmt:
571 outer-loop-header-bb:
572 ...
573 inner-loop:
574 d = def_stmt
575 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
576 stmt # use (d) */
577 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "inner-loop def-stmt defining outer-loop stmt.\n");
582
583 switch (relevant)
584 {
585 case vect_unused_in_scope:
586 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
587 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
588 vect_used_in_outer_by_reduction : vect_unused_in_scope;
589 break;
590
591 case vect_used_by_reduction:
592 relevant = vect_used_in_outer_by_reduction;
593 break;
594
595 case vect_used_in_scope:
596 relevant = vect_used_in_outer;
597 break;
598
599 default:
600 gcc_unreachable ();
601 }
602 }
603
604 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
605 is_pattern_stmt_p (stmt_vinfo));
606 return true;
607 }
608
609
610 /* Function vect_mark_stmts_to_be_vectorized.
611
612 Not all stmts in the loop need to be vectorized. For example:
613
614 for i...
615 for j...
616 1. T0 = i + j
617 2. T1 = a[T0]
618
619 3. j = j + 1
620
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
623
624 This pass detects such stmts. */
625
626 bool
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
628 {
629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
630 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
631 unsigned int nbbs = loop->num_nodes;
632 gimple_stmt_iterator si;
633 gimple stmt;
634 unsigned int i;
635 stmt_vec_info stmt_vinfo;
636 basic_block bb;
637 gimple phi;
638 bool live_p;
639 enum vect_relevant relevant, tmp_relevant;
640 enum vect_def_type def_type;
641
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
645
646 auto_vec<gimple, 64> worklist;
647
648 /* 1. Init worklist. */
649 for (i = 0; i < nbbs; i++)
650 {
651 bb = bbs[i];
652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
653 {
654 phi = gsi_stmt (si);
655 if (dump_enabled_p ())
656 {
657 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
659 dump_printf (MSG_NOTE, "\n");
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
669 {
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 dump_printf (MSG_NOTE, "\n");
673 }
674
675 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
676 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
677 }
678 }
679
680 /* 2. Process_worklist */
681 while (worklist.length () > 0)
682 {
683 use_operand_p use_p;
684 ssa_op_iter iter;
685
686 stmt = worklist.pop ();
687 if (dump_enabled_p ())
688 {
689 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691 dump_printf (MSG_NOTE, "\n");
692 }
693
694 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
695 (DEF_STMT) as relevant/irrelevant and live/dead according to the
696 liveness and relevance properties of STMT. */
697 stmt_vinfo = vinfo_for_stmt (stmt);
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
700
701 /* Generally, the liveness and relevance properties of STMT are
702 propagated as is to the DEF_STMTs of its USEs:
703 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
704 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
705
706 One exception is when STMT has been identified as defining a reduction
707 variable; in this case we set the liveness/relevance as follows:
708 live_p = false
709 relevant = vect_used_by_reduction
710 This is because we distinguish between two kinds of relevant stmts -
711 those that are used by a reduction computation, and those that are
712 (also) used by a regular computation. This allows us later on to
713 identify stmts that are used solely by a reduction, and therefore the
714 order of the results that they produce does not have to be kept. */
715
716 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
717 tmp_relevant = relevant;
718 switch (def_type)
719 {
720 case vect_reduction_def:
721 switch (tmp_relevant)
722 {
723 case vect_unused_in_scope:
724 relevant = vect_used_by_reduction;
725 break;
726
727 case vect_used_by_reduction:
728 if (gimple_code (stmt) == GIMPLE_PHI)
729 break;
730 /* fall through */
731
732 default:
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of reduction.\n");
736 return false;
737 }
738
739 live_p = false;
740 break;
741
742 case vect_nested_cycle:
743 if (tmp_relevant != vect_unused_in_scope
744 && tmp_relevant != vect_used_in_outer_by_reduction
745 && tmp_relevant != vect_used_in_outer)
746 {
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of nested cycle.\n");
750
751 return false;
752 }
753
754 live_p = false;
755 break;
756
757 case vect_double_reduction_def:
758 if (tmp_relevant != vect_unused_in_scope
759 && tmp_relevant != vect_used_by_reduction)
760 {
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
763 "unsupported use of double reduction.\n");
764
765 return false;
766 }
767
768 live_p = false;
769 break;
770
771 default:
772 break;
773 }
774
775 if (is_pattern_stmt_p (stmt_vinfo))
776 {
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (is_gimple_assign (stmt))
781 {
782 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
783 tree op = gimple_assign_rhs1 (stmt);
784
785 i = 1;
786 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
787 {
788 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
789 live_p, relevant, &worklist, false)
790 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
791 live_p, relevant, &worklist, false))
792 return false;
793 i = 2;
794 }
795 for (; i < gimple_num_ops (stmt); i++)
796 {
797 op = gimple_op (stmt, i);
798 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
799 &worklist, false))
800 return false;
801 }
802 }
803 else if (is_gimple_call (stmt))
804 {
805 for (i = 0; i < gimple_call_num_args (stmt); i++)
806 {
807 tree arg = gimple_call_arg (stmt, i);
808 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
809 &worklist, false))
810 return false;
811 }
812 }
813 }
814 else
815 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
816 {
817 tree op = USE_FROM_PTR (use_p);
818 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
819 &worklist, false))
820 return false;
821 }
822
823 if (STMT_VINFO_GATHER_P (stmt_vinfo))
824 {
825 tree off;
826 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
827 gcc_assert (decl);
828 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
829 &worklist, true))
830 return false;
831 }
832 } /* while worklist */
833
834 return true;
835 }
836
837
838 /* Function vect_model_simple_cost.
839
840 Models cost for simple operations, i.e. those that only emit ncopies of a
841 single op. Right now, this does not account for multiple insns that could
842 be generated for the single vector op. We will handle that shortly. */
843
844 void
845 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
846 enum vect_def_type *dt,
847 stmt_vector_for_cost *prologue_cost_vec,
848 stmt_vector_for_cost *body_cost_vec)
849 {
850 int i;
851 int inside_cost = 0, prologue_cost = 0;
852
853 /* The SLP costs were already calculated during SLP tree build. */
854 if (PURE_SLP_STMT (stmt_info))
855 return;
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 /* Pass the inside-of-loop statements to the target-specific cost model. */
864 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
865 stmt_info, 0, vect_body);
866
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_simple_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 }
872
873
874 /* Model cost for type demotion and promotion operations. PWR is normally
875 zero for single-step promotions and demotions. It will be one if
876 two-step promotion/demotion is required, and so on. Each additional
877 step doubles the number of instructions required. */
878
879 static void
880 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
881 enum vect_def_type *dt, int pwr)
882 {
883 int i, tmp;
884 int inside_cost = 0, prologue_cost = 0;
885 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
886 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
887 void *target_cost_data;
888
889 /* The SLP costs were already calculated during SLP tree build. */
890 if (PURE_SLP_STMT (stmt_info))
891 return;
892
893 if (loop_vinfo)
894 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
895 else
896 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
897
898 for (i = 0; i < pwr + 1; i++)
899 {
900 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
901 (i + 1) : i;
902 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
903 vec_promote_demote, stmt_info, 0,
904 vect_body);
905 }
906
907 /* FORNOW: Assuming maximum 2 args per stmts. */
908 for (i = 0; i < 2; i++)
909 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
910 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
911 stmt_info, 0, vect_prologue);
912
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE, vect_location,
915 "vect_model_promotion_demotion_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 }
918
919 /* Function vect_cost_group_size
920
921 For grouped load or store, return the group_size only if it is the first
922 load or store of a group, else return 1. This ensures that group size is
923 only returned once per group. */
924
925 static int
926 vect_cost_group_size (stmt_vec_info stmt_info)
927 {
928 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929
930 if (first_stmt == STMT_VINFO_STMT (stmt_info))
931 return GROUP_SIZE (stmt_info);
932
933 return 1;
934 }
935
936
937 /* Function vect_model_store_cost
938
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
941
942 void
943 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
944 bool store_lanes_p, enum vect_def_type dt,
945 slp_tree slp_node,
946 stmt_vector_for_cost *prologue_cost_vec,
947 stmt_vector_for_cost *body_cost_vec)
948 {
949 int group_size;
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 struct data_reference *first_dr;
952 gimple first_stmt;
953
954 /* The SLP costs were already calculated during SLP tree build. */
955 if (PURE_SLP_STMT (stmt_info))
956 return;
957
958 if (dt == vect_constant_def || dt == vect_external_def)
959 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
960 stmt_info, 0, vect_prologue);
961
962 /* Grouped access? */
963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
964 {
965 if (slp_node)
966 {
967 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
968 group_size = 1;
969 }
970 else
971 {
972 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
973 group_size = vect_cost_group_size (stmt_info);
974 }
975
976 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
977 }
978 /* Not a grouped access. */
979 else
980 {
981 group_size = 1;
982 first_dr = STMT_VINFO_DATA_REF (stmt_info);
983 }
984
985 /* We assume that the cost of a single store-lanes instruction is
986 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
987 access is instead being provided by a permute-and-store operation,
988 include the cost of the permutes. */
989 if (!store_lanes_p && group_size > 1)
990 {
991 /* Uses a high and low interleave or shuffle operations for each
992 needed permute. */
993 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
994 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
995 stmt_info, 0, vect_body);
996
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: strided group_size = %d .\n",
1000 group_size);
1001 }
1002
1003 /* Costs of the stores. */
1004 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1005
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_NOTE, vect_location,
1008 "vect_model_store_cost: inside_cost = %d, "
1009 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1010 }
1011
1012
1013 /* Calculate cost of DR's memory access. */
1014 void
1015 vect_get_store_cost (struct data_reference *dr, int ncopies,
1016 unsigned int *inside_cost,
1017 stmt_vector_for_cost *body_cost_vec)
1018 {
1019 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1020 gimple stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1022
1023 switch (alignment_support_scheme)
1024 {
1025 case dr_aligned:
1026 {
1027 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1028 vector_store, stmt_info, 0,
1029 vect_body);
1030
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_NOTE, vect_location,
1033 "vect_model_store_cost: aligned.\n");
1034 break;
1035 }
1036
1037 case dr_unaligned_supported:
1038 {
1039 /* Here, we assign an additional cost for the unaligned store. */
1040 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1041 unaligned_store, stmt_info,
1042 DR_MISALIGNMENT (dr), vect_body);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE, vect_location,
1045 "vect_model_store_cost: unaligned supported by "
1046 "hardware.\n");
1047 break;
1048 }
1049
1050 case dr_unaligned_unsupported:
1051 {
1052 *inside_cost = VECT_MAX_COST;
1053
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1056 "vect_model_store_cost: unsupported access.\n");
1057 break;
1058 }
1059
1060 default:
1061 gcc_unreachable ();
1062 }
1063 }
1064
1065
1066 /* Function vect_model_load_cost
1067
1068 Models cost for loads. In the case of grouped accesses, the last access
1069 has the overhead of the grouped access attributed to it. Since unaligned
1070 accesses are supported for loads, we also account for the costs of the
1071 access scheme chosen. */
1072
1073 void
1074 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1075 bool load_lanes_p, slp_tree slp_node,
1076 stmt_vector_for_cost *prologue_cost_vec,
1077 stmt_vector_for_cost *body_cost_vec)
1078 {
1079 int group_size;
1080 gimple first_stmt;
1081 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1082 unsigned int inside_cost = 0, prologue_cost = 0;
1083
1084 /* The SLP costs were already calculated during SLP tree build. */
1085 if (PURE_SLP_STMT (stmt_info))
1086 return;
1087
1088 /* Grouped accesses? */
1089 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1090 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1091 {
1092 group_size = vect_cost_group_size (stmt_info);
1093 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1094 }
1095 /* Not a grouped access. */
1096 else
1097 {
1098 group_size = 1;
1099 first_dr = dr;
1100 }
1101
1102 /* We assume that the cost of a single load-lanes instruction is
1103 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1104 access is instead being provided by a load-and-permute operation,
1105 include the cost of the permutes. */
1106 if (!load_lanes_p && group_size > 1)
1107 {
1108 /* Uses an even and odd extract operations or shuffle operations
1109 for each needed permute. */
1110 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1111 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1112 stmt_info, 0, vect_body);
1113
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_load_cost: strided group_size = %d .\n",
1117 group_size);
1118 }
1119
1120 /* The loads themselves. */
1121 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1122 {
1123 /* N scalar loads plus gathering them into a vector. */
1124 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1125 inside_cost += record_stmt_cost (body_cost_vec,
1126 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1127 scalar_load, stmt_info, 0, vect_body);
1128 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1129 stmt_info, 0, vect_body);
1130 }
1131 else
1132 vect_get_load_cost (first_dr, ncopies,
1133 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1134 || group_size > 1 || slp_node),
1135 &inside_cost, &prologue_cost,
1136 prologue_cost_vec, body_cost_vec, true);
1137
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: inside_cost = %d, "
1141 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1142 }
1143
1144
1145 /* Calculate cost of DR's memory access. */
1146 void
1147 vect_get_load_cost (struct data_reference *dr, int ncopies,
1148 bool add_realign_cost, unsigned int *inside_cost,
1149 unsigned int *prologue_cost,
1150 stmt_vector_for_cost *prologue_cost_vec,
1151 stmt_vector_for_cost *body_cost_vec,
1152 bool record_prologue_costs)
1153 {
1154 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157
1158 switch (alignment_support_scheme)
1159 {
1160 case dr_aligned:
1161 {
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1163 stmt_info, 0, vect_body);
1164
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: aligned.\n");
1168
1169 break;
1170 }
1171 case dr_unaligned_supported:
1172 {
1173 /* Here, we assign an additional cost for the unaligned load. */
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 unaligned_load, stmt_info,
1176 DR_MISALIGNMENT (dr), vect_body);
1177
1178 if (dump_enabled_p ())
1179 dump_printf_loc (MSG_NOTE, vect_location,
1180 "vect_model_load_cost: unaligned supported by "
1181 "hardware.\n");
1182
1183 break;
1184 }
1185 case dr_explicit_realign:
1186 {
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1188 vector_load, stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1190 vec_perm, stmt_info, 0, vect_body);
1191
1192 /* FIXME: If the misalignment remains fixed across the iterations of
1193 the containing loop, the following cost should be added to the
1194 prologue costs. */
1195 if (targetm.vectorize.builtin_mask_for_load)
1196 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1197 stmt_info, 0, vect_body);
1198
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign\n");
1202
1203 break;
1204 }
1205 case dr_explicit_realign_optimized:
1206 {
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_model_load_cost: unaligned software "
1210 "pipelined.\n");
1211
1212 /* Unaligned software pipeline has a load of an address, an initial
1213 load, and possibly a mask operation to "prime" the loop. However,
1214 if this is an access in a group of loads, which provide grouped
1215 access, then the above cost should only be considered for one
1216 access in the group. Inside the loop, there is a load op
1217 and a realignment op. */
1218
1219 if (add_realign_cost && record_prologue_costs)
1220 {
1221 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1222 vector_stmt, stmt_info,
1223 0, vect_prologue);
1224 if (targetm.vectorize.builtin_mask_for_load)
1225 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1226 vector_stmt, stmt_info,
1227 0, vect_prologue);
1228 }
1229
1230 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1231 stmt_info, 0, vect_body);
1232 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1233 stmt_info, 0, vect_body);
1234
1235 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "vect_model_load_cost: explicit realign optimized"
1238 "\n");
1239
1240 break;
1241 }
1242
1243 case dr_unaligned_unsupported:
1244 {
1245 *inside_cost = VECT_MAX_COST;
1246
1247 if (dump_enabled_p ())
1248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1249 "vect_model_load_cost: unsupported access.\n");
1250 break;
1251 }
1252
1253 default:
1254 gcc_unreachable ();
1255 }
1256 }
1257
1258 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1259 the loop preheader for the vectorized stmt STMT. */
1260
1261 static void
1262 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1263 {
1264 if (gsi)
1265 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1266 else
1267 {
1268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1269 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1270
1271 if (loop_vinfo)
1272 {
1273 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1274 basic_block new_bb;
1275 edge pe;
1276
1277 if (nested_in_vect_loop_p (loop, stmt))
1278 loop = loop->inner;
1279
1280 pe = loop_preheader_edge (loop);
1281 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1282 gcc_assert (!new_bb);
1283 }
1284 else
1285 {
1286 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1287 basic_block bb;
1288 gimple_stmt_iterator gsi_bb_start;
1289
1290 gcc_assert (bb_vinfo);
1291 bb = BB_VINFO_BB (bb_vinfo);
1292 gsi_bb_start = gsi_after_labels (bb);
1293 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1294 }
1295 }
1296
1297 if (dump_enabled_p ())
1298 {
1299 dump_printf_loc (MSG_NOTE, vect_location,
1300 "created new init_stmt: ");
1301 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1302 dump_printf (MSG_NOTE, "\n");
1303 }
1304 }
1305
1306 /* Function vect_init_vector.
1307
1308 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1309 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1310 vector type a vector with all elements equal to VAL is created first.
1311 Place the initialization at BSI if it is not NULL. Otherwise, place the
1312 initialization at the loop preheader.
1313 Return the DEF of INIT_STMT.
1314 It will be used in the vectorization of STMT. */
1315
1316 tree
1317 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1318 {
1319 tree new_var;
1320 gimple init_stmt;
1321 tree vec_oprnd;
1322 tree new_temp;
1323
1324 if (TREE_CODE (type) == VECTOR_TYPE
1325 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1326 {
1327 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1328 {
1329 if (CONSTANT_CLASS_P (val))
1330 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1331 else
1332 {
1333 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1334 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1335 new_temp, val,
1336 NULL_TREE);
1337 vect_init_vector_1 (stmt, init_stmt, gsi);
1338 val = new_temp;
1339 }
1340 }
1341 val = build_vector_from_val (type, val);
1342 }
1343
1344 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1345 init_stmt = gimple_build_assign (new_var, val);
1346 new_temp = make_ssa_name (new_var, init_stmt);
1347 gimple_assign_set_lhs (init_stmt, new_temp);
1348 vect_init_vector_1 (stmt, init_stmt, gsi);
1349 vec_oprnd = gimple_assign_lhs (init_stmt);
1350 return vec_oprnd;
1351 }
1352
1353
1354 /* Function vect_get_vec_def_for_operand.
1355
1356 OP is an operand in STMT. This function returns a (vector) def that will be
1357 used in the vectorized stmt for STMT.
1358
1359 In the case that OP is an SSA_NAME which is defined in the loop, then
1360 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1361
1362 In case OP is an invariant or constant, a new stmt that creates a vector def
1363 needs to be introduced. */
1364
1365 tree
1366 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1367 {
1368 tree vec_oprnd;
1369 gimple vec_stmt;
1370 gimple def_stmt;
1371 stmt_vec_info def_stmt_info = NULL;
1372 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1373 unsigned int nunits;
1374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1375 tree def;
1376 enum vect_def_type dt;
1377 bool is_simple_use;
1378 tree vector_type;
1379
1380 if (dump_enabled_p ())
1381 {
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "vect_get_vec_def_for_operand: ");
1384 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1385 dump_printf (MSG_NOTE, "\n");
1386 }
1387
1388 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1389 &def_stmt, &def, &dt);
1390 gcc_assert (is_simple_use);
1391 if (dump_enabled_p ())
1392 {
1393 int loc_printed = 0;
1394 if (def)
1395 {
1396 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1397 loc_printed = 1;
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1399 dump_printf (MSG_NOTE, "\n");
1400 }
1401 if (def_stmt)
1402 {
1403 if (loc_printed)
1404 dump_printf (MSG_NOTE, " def_stmt = ");
1405 else
1406 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1408 dump_printf (MSG_NOTE, "\n");
1409 }
1410 }
1411
1412 switch (dt)
1413 {
1414 /* Case 1: operand is a constant. */
1415 case vect_constant_def:
1416 {
1417 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1418 gcc_assert (vector_type);
1419 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1420
1421 if (scalar_def)
1422 *scalar_def = op;
1423
1424 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Create vector_cst. nunits = %d\n", nunits);
1428
1429 return vect_init_vector (stmt, op, vector_type, NULL);
1430 }
1431
1432 /* Case 2: operand is defined outside the loop - loop invariant. */
1433 case vect_external_def:
1434 {
1435 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1436 gcc_assert (vector_type);
1437
1438 if (scalar_def)
1439 *scalar_def = def;
1440
1441 /* Create 'vec_inv = {inv,inv,..,inv}' */
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1444
1445 return vect_init_vector (stmt, def, vector_type, NULL);
1446 }
1447
1448 /* Case 3: operand is defined inside the loop. */
1449 case vect_internal_def:
1450 {
1451 if (scalar_def)
1452 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1453
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456
1457 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1458 /* Get vectorized pattern statement. */
1459 if (!vec_stmt
1460 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1461 && !STMT_VINFO_RELEVANT (def_stmt_info))
1462 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1463 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1464 gcc_assert (vec_stmt);
1465 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1466 vec_oprnd = PHI_RESULT (vec_stmt);
1467 else if (is_gimple_call (vec_stmt))
1468 vec_oprnd = gimple_call_lhs (vec_stmt);
1469 else
1470 vec_oprnd = gimple_assign_lhs (vec_stmt);
1471 return vec_oprnd;
1472 }
1473
1474 /* Case 4: operand is defined by a loop header phi - reduction */
1475 case vect_reduction_def:
1476 case vect_double_reduction_def:
1477 case vect_nested_cycle:
1478 {
1479 struct loop *loop;
1480
1481 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1482 loop = (gimple_bb (def_stmt))->loop_father;
1483
1484 /* Get the def before the loop */
1485 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1486 return get_initial_def_for_reduction (stmt, op, scalar_def);
1487 }
1488
1489 /* Case 5: operand is defined by loop-header phi - induction. */
1490 case vect_induction_def:
1491 {
1492 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1493
1494 /* Get the def from the vectorized stmt. */
1495 def_stmt_info = vinfo_for_stmt (def_stmt);
1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1497 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt);
1499 else
1500 vec_oprnd = gimple_get_lhs (vec_stmt);
1501 return vec_oprnd;
1502 }
1503
1504 default:
1505 gcc_unreachable ();
1506 }
1507 }
1508
1509
1510 /* Function vect_get_vec_def_for_stmt_copy
1511
1512 Return a vector-def for an operand. This function is used when the
1513 vectorized stmt to be created (by the caller to this function) is a "copy"
1514 created in case the vectorized result cannot fit in one vector, and several
1515 copies of the vector-stmt are required. In this case the vector-def is
1516 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1517 of the stmt that defines VEC_OPRND.
1518 DT is the type of the vector def VEC_OPRND.
1519
1520 Context:
1521 In case the vectorization factor (VF) is bigger than the number
1522 of elements that can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt to vectorize the scalar stmt. This situation
1524 arises when there are multiple data-types operated upon in the loop; the
1525 smallest data-type determines the VF, and as a result, when vectorizing
1526 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1527 vector stmt (each computing a vector of 'nunits' results, and together
1528 computing 'VF' results in each iteration). This function is called when
1529 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1530 which VF=16 and nunits=4, so the number of copies required is 4):
1531
1532 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1533
1534 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1535 VS1.1: vx.1 = memref1 VS1.2
1536 VS1.2: vx.2 = memref2 VS1.3
1537 VS1.3: vx.3 = memref3
1538
1539 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1540 VSnew.1: vz1 = vx.1 + ... VSnew.2
1541 VSnew.2: vz2 = vx.2 + ... VSnew.3
1542 VSnew.3: vz3 = vx.3 + ...
1543
1544 The vectorization of S1 is explained in vectorizable_load.
1545 The vectorization of S2:
1546 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1547 the function 'vect_get_vec_def_for_operand' is called to
1548 get the relevant vector-def for each operand of S2. For operand x it
1549 returns the vector-def 'vx.0'.
1550
1551 To create the remaining copies of the vector-stmt (VSnew.j), this
1552 function is called to get the relevant vector-def for each operand. It is
1553 obtained from the respective VS1.j stmt, which is recorded in the
1554 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1555
1556 For example, to obtain the vector-def 'vx.1' in order to create the
1557 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1558 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1559 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1560 and return its def ('vx.1').
1561 Overall, to create the above sequence this function will be called 3 times:
1562 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1563 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1564 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1565
1566 tree
1567 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1568 {
1569 gimple vec_stmt_for_operand;
1570 stmt_vec_info def_stmt_info;
1571
1572 /* Do nothing; can reuse same def. */
1573 if (dt == vect_external_def || dt == vect_constant_def )
1574 return vec_oprnd;
1575
1576 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1577 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1578 gcc_assert (def_stmt_info);
1579 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1580 gcc_assert (vec_stmt_for_operand);
1581 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1582 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1583 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1584 else
1585 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1586 return vec_oprnd;
1587 }
1588
1589
1590 /* Get vectorized definitions for the operands to create a copy of an original
1591 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1592
1593 static void
1594 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1595 vec<tree> *vec_oprnds0,
1596 vec<tree> *vec_oprnds1)
1597 {
1598 tree vec_oprnd = vec_oprnds0->pop ();
1599
1600 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1601 vec_oprnds0->quick_push (vec_oprnd);
1602
1603 if (vec_oprnds1 && vec_oprnds1->length ())
1604 {
1605 vec_oprnd = vec_oprnds1->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1607 vec_oprnds1->quick_push (vec_oprnd);
1608 }
1609 }
1610
1611
1612 /* Get vectorized definitions for OP0 and OP1.
1613 REDUC_INDEX is the index of reduction operand in case of reduction,
1614 and -1 otherwise. */
1615
1616 void
1617 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1618 vec<tree> *vec_oprnds0,
1619 vec<tree> *vec_oprnds1,
1620 slp_tree slp_node, int reduc_index)
1621 {
1622 if (slp_node)
1623 {
1624 int nops = (op1 == NULL_TREE) ? 1 : 2;
1625 auto_vec<tree> ops (nops);
1626 auto_vec<vec<tree> > vec_defs (nops);
1627
1628 ops.quick_push (op0);
1629 if (op1)
1630 ops.quick_push (op1);
1631
1632 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1633
1634 *vec_oprnds0 = vec_defs[0];
1635 if (op1)
1636 *vec_oprnds1 = vec_defs[1];
1637 }
1638 else
1639 {
1640 tree vec_oprnd;
1641
1642 vec_oprnds0->create (1);
1643 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1644 vec_oprnds0->quick_push (vec_oprnd);
1645
1646 if (op1)
1647 {
1648 vec_oprnds1->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1650 vec_oprnds1->quick_push (vec_oprnd);
1651 }
1652 }
1653 }
1654
1655
1656 /* Function vect_finish_stmt_generation.
1657
1658 Insert a new stmt. */
1659
1660 void
1661 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1662 gimple_stmt_iterator *gsi)
1663 {
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1667
1668 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1669
1670 if (!gsi_end_p (*gsi)
1671 && gimple_has_mem_ops (vec_stmt))
1672 {
1673 gimple at_stmt = gsi_stmt (*gsi);
1674 tree vuse = gimple_vuse (at_stmt);
1675 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1676 {
1677 tree vdef = gimple_vdef (at_stmt);
1678 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1679 /* If we have an SSA vuse and insert a store, update virtual
1680 SSA form to avoid triggering the renamer. Do so only
1681 if we can easily see all uses - which is what almost always
1682 happens with the way vectorized stmts are inserted. */
1683 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1684 && ((is_gimple_assign (vec_stmt)
1685 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1686 || (is_gimple_call (vec_stmt)
1687 && !(gimple_call_flags (vec_stmt)
1688 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1689 {
1690 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1691 gimple_set_vdef (vec_stmt, new_vdef);
1692 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1693 }
1694 }
1695 }
1696 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1697
1698 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1699 bb_vinfo));
1700
1701 if (dump_enabled_p ())
1702 {
1703 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1705 dump_printf (MSG_NOTE, "\n");
1706 }
1707
1708 gimple_set_location (vec_stmt, gimple_location (stmt));
1709
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr = lookup_stmt_eh_lp (stmt);
1714 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1715 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1716 }
1717
1718 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1719 a function declaration if the target has a vectorized version
1720 of the function, or NULL_TREE if the function cannot be vectorized. */
1721
1722 tree
1723 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1724 {
1725 tree fndecl = gimple_call_fndecl (call);
1726
1727 /* We only handle functions that do not read or clobber memory -- i.e.
1728 const or novops ones. */
1729 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1730 return NULL_TREE;
1731
1732 if (!fndecl
1733 || TREE_CODE (fndecl) != FUNCTION_DECL
1734 || !DECL_BUILT_IN (fndecl))
1735 return NULL_TREE;
1736
1737 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1738 vectype_in);
1739 }
1740
1741
1742 static tree permute_vec_elements (tree, tree, tree, gimple,
1743 gimple_stmt_iterator *);
1744
1745
1746 /* Function vectorizable_mask_load_store.
1747
1748 Check if STMT performs a conditional load or store that can be vectorized.
1749 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1750 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1752
1753 static bool
1754 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1755 gimple *vec_stmt, slp_tree slp_node)
1756 {
1757 tree vec_dest = NULL;
1758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1759 stmt_vec_info prev_stmt_info;
1760 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1761 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1762 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1764 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1765 tree elem_type;
1766 gimple new_stmt;
1767 tree dummy;
1768 tree dataref_ptr = NULL_TREE;
1769 gimple ptr_incr;
1770 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1771 int ncopies;
1772 int i, j;
1773 bool inv_p;
1774 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1775 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1776 int gather_scale = 1;
1777 enum vect_def_type gather_dt = vect_unknown_def_type;
1778 bool is_store;
1779 tree mask;
1780 gimple def_stmt;
1781 tree def;
1782 enum vect_def_type dt;
1783
1784 if (slp_node != NULL)
1785 return false;
1786
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1788 gcc_assert (ncopies >= 1);
1789
1790 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1791 mask = gimple_call_arg (stmt, 2);
1792 if (TYPE_PRECISION (TREE_TYPE (mask))
1793 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1794 return false;
1795
1796 /* FORNOW. This restriction should be relaxed. */
1797 if (nested_in_vect_loop && ncopies > 1)
1798 {
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "multiple types in nested loop.");
1802 return false;
1803 }
1804
1805 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1806 return false;
1807
1808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1809 return false;
1810
1811 if (!STMT_VINFO_DATA_REF (stmt_info))
1812 return false;
1813
1814 elem_type = TREE_TYPE (vectype);
1815
1816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1817 return false;
1818
1819 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1820 return false;
1821
1822 if (STMT_VINFO_GATHER_P (stmt_info))
1823 {
1824 gimple def_stmt;
1825 tree def;
1826 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1827 &gather_off, &gather_scale);
1828 gcc_assert (gather_decl);
1829 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1830 &def_stmt, &def, &gather_dt,
1831 &gather_off_vectype))
1832 {
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "gather index use not simple.");
1836 return false;
1837 }
1838
1839 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1840 tree masktype
1841 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1842 if (TREE_CODE (masktype) == INTEGER_TYPE)
1843 {
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "masked gather with integer mask not supported.");
1847 return false;
1848 }
1849 }
1850 else if (tree_int_cst_compare (nested_in_vect_loop
1851 ? STMT_VINFO_DR_STEP (stmt_info)
1852 : DR_STEP (dr), size_zero_node) <= 0)
1853 return false;
1854 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1855 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1856 return false;
1857
1858 if (TREE_CODE (mask) != SSA_NAME)
1859 return false;
1860
1861 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1862 &def_stmt, &def, &dt))
1863 return false;
1864
1865 if (is_store)
1866 {
1867 tree rhs = gimple_call_arg (stmt, 3);
1868 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1869 &def_stmt, &def, &dt))
1870 return false;
1871 }
1872
1873 if (!vec_stmt) /* transformation not required. */
1874 {
1875 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1876 if (is_store)
1877 vect_model_store_cost (stmt_info, ncopies, false, dt,
1878 NULL, NULL, NULL);
1879 else
1880 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1881 return true;
1882 }
1883
1884 /** Transform. **/
1885
1886 if (STMT_VINFO_GATHER_P (stmt_info))
1887 {
1888 tree vec_oprnd0 = NULL_TREE, op;
1889 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1890 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1891 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1892 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1893 tree mask_perm_mask = NULL_TREE;
1894 edge pe = loop_preheader_edge (loop);
1895 gimple_seq seq;
1896 basic_block new_bb;
1897 enum { NARROW, NONE, WIDEN } modifier;
1898 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1899
1900 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1901 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 scaletype = TREE_VALUE (arglist);
1906 gcc_checking_assert (types_compatible_p (srctype, rettype)
1907 && types_compatible_p (srctype, masktype));
1908
1909 if (nunits == gather_off_nunits)
1910 modifier = NONE;
1911 else if (nunits == gather_off_nunits / 2)
1912 {
1913 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1914 modifier = WIDEN;
1915
1916 for (i = 0; i < gather_off_nunits; ++i)
1917 sel[i] = i | nunits;
1918
1919 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1920 gcc_assert (perm_mask != NULL_TREE);
1921 }
1922 else if (nunits == gather_off_nunits * 2)
1923 {
1924 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1925 modifier = NARROW;
1926
1927 for (i = 0; i < nunits; ++i)
1928 sel[i] = i < gather_off_nunits
1929 ? i : i + nunits - gather_off_nunits;
1930
1931 perm_mask = vect_gen_perm_mask (vectype, sel);
1932 gcc_assert (perm_mask != NULL_TREE);
1933 ncopies *= 2;
1934 for (i = 0; i < nunits; ++i)
1935 sel[i] = i | gather_off_nunits;
1936 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1937 gcc_assert (mask_perm_mask != NULL_TREE);
1938 }
1939 else
1940 gcc_unreachable ();
1941
1942 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1943
1944 ptr = fold_convert (ptrtype, gather_base);
1945 if (!is_gimple_min_invariant (ptr))
1946 {
1947 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1948 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1949 gcc_assert (!new_bb);
1950 }
1951
1952 scale = build_int_cst (scaletype, gather_scale);
1953
1954 prev_stmt_info = NULL;
1955 for (j = 0; j < ncopies; ++j)
1956 {
1957 if (modifier == WIDEN && (j & 1))
1958 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1959 perm_mask, stmt, gsi);
1960 else if (j == 0)
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1963 else
1964 op = vec_oprnd0
1965 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1966
1967 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1968 {
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1970 == TYPE_VECTOR_SUBPARTS (idxtype));
1971 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1972 var = make_ssa_name (var, NULL);
1973 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1974 new_stmt
1975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1976 op, NULL_TREE);
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1978 op = var;
1979 }
1980
1981 if (mask_perm_mask && (j & 1))
1982 mask_op = permute_vec_elements (mask_op, mask_op,
1983 mask_perm_mask, stmt, gsi);
1984 else
1985 {
1986 if (j == 0)
1987 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1988 else
1989 {
1990 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1991 &def_stmt, &def, &dt);
1992 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1993 }
1994
1995 mask_op = vec_mask;
1996 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1997 {
1998 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1999 == TYPE_VECTOR_SUBPARTS (masktype));
2000 var = vect_get_new_vect_var (masktype, vect_simple_var,
2001 NULL);
2002 var = make_ssa_name (var, NULL);
2003 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2004 new_stmt
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2006 mask_op, NULL_TREE);
2007 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2008 mask_op = var;
2009 }
2010 }
2011
2012 new_stmt
2013 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2014 scale);
2015
2016 if (!useless_type_conversion_p (vectype, rettype))
2017 {
2018 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2019 == TYPE_VECTOR_SUBPARTS (rettype));
2020 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2021 op = make_ssa_name (var, new_stmt);
2022 gimple_call_set_lhs (new_stmt, op);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024 var = make_ssa_name (vec_dest, NULL);
2025 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2026 new_stmt
2027 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2028 NULL_TREE);
2029 }
2030 else
2031 {
2032 var = make_ssa_name (vec_dest, new_stmt);
2033 gimple_call_set_lhs (new_stmt, var);
2034 }
2035
2036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2037
2038 if (modifier == NARROW)
2039 {
2040 if ((j & 1) == 0)
2041 {
2042 prev_res = var;
2043 continue;
2044 }
2045 var = permute_vec_elements (prev_res, var,
2046 perm_mask, stmt, gsi);
2047 new_stmt = SSA_NAME_DEF_STMT (var);
2048 }
2049
2050 if (prev_stmt_info == NULL)
2051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2052 else
2053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2054 prev_stmt_info = vinfo_for_stmt (new_stmt);
2055 }
2056
2057 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2058 from the IL. */
2059 tree lhs = gimple_call_lhs (stmt);
2060 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2061 set_vinfo_for_stmt (new_stmt, stmt_info);
2062 set_vinfo_for_stmt (stmt, NULL);
2063 STMT_VINFO_STMT (stmt_info) = new_stmt;
2064 gsi_replace (gsi, new_stmt, true);
2065 return true;
2066 }
2067 else if (is_store)
2068 {
2069 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2070 prev_stmt_info = NULL;
2071 for (i = 0; i < ncopies; i++)
2072 {
2073 unsigned align, misalign;
2074
2075 if (i == 0)
2076 {
2077 tree rhs = gimple_call_arg (stmt, 3);
2078 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2079 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2080 /* We should have catched mismatched types earlier. */
2081 gcc_assert (useless_type_conversion_p (vectype,
2082 TREE_TYPE (vec_rhs)));
2083 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2084 NULL_TREE, &dummy, gsi,
2085 &ptr_incr, false, &inv_p);
2086 gcc_assert (!inv_p);
2087 }
2088 else
2089 {
2090 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2091 &def, &dt);
2092 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2093 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2094 &def, &dt);
2095 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2096 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2097 TYPE_SIZE_UNIT (vectype));
2098 }
2099
2100 align = TYPE_ALIGN_UNIT (vectype);
2101 if (aligned_access_p (dr))
2102 misalign = 0;
2103 else if (DR_MISALIGNMENT (dr) == -1)
2104 {
2105 align = TYPE_ALIGN_UNIT (elem_type);
2106 misalign = 0;
2107 }
2108 else
2109 misalign = DR_MISALIGNMENT (dr);
2110 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2111 misalign);
2112 new_stmt
2113 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2114 gimple_call_arg (stmt, 1),
2115 vec_mask, vec_rhs);
2116 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2117 if (i == 0)
2118 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2119 else
2120 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2121 prev_stmt_info = vinfo_for_stmt (new_stmt);
2122 }
2123 }
2124 else
2125 {
2126 tree vec_mask = NULL_TREE;
2127 prev_stmt_info = NULL;
2128 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2129 for (i = 0; i < ncopies; i++)
2130 {
2131 unsigned align, misalign;
2132
2133 if (i == 0)
2134 {
2135 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2136 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2137 NULL_TREE, &dummy, gsi,
2138 &ptr_incr, false, &inv_p);
2139 gcc_assert (!inv_p);
2140 }
2141 else
2142 {
2143 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2144 &def, &dt);
2145 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2146 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2147 TYPE_SIZE_UNIT (vectype));
2148 }
2149
2150 align = TYPE_ALIGN_UNIT (vectype);
2151 if (aligned_access_p (dr))
2152 misalign = 0;
2153 else if (DR_MISALIGNMENT (dr) == -1)
2154 {
2155 align = TYPE_ALIGN_UNIT (elem_type);
2156 misalign = 0;
2157 }
2158 else
2159 misalign = DR_MISALIGNMENT (dr);
2160 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2161 misalign);
2162 new_stmt
2163 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2164 gimple_call_arg (stmt, 1),
2165 vec_mask);
2166 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2168 if (i == 0)
2169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2170 else
2171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2172 prev_stmt_info = vinfo_for_stmt (new_stmt);
2173 }
2174 }
2175
2176 if (!is_store)
2177 {
2178 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2179 from the IL. */
2180 tree lhs = gimple_call_lhs (stmt);
2181 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2182 set_vinfo_for_stmt (new_stmt, stmt_info);
2183 set_vinfo_for_stmt (stmt, NULL);
2184 STMT_VINFO_STMT (stmt_info) = new_stmt;
2185 gsi_replace (gsi, new_stmt, true);
2186 }
2187
2188 return true;
2189 }
2190
2191
2192 /* Function vectorizable_call.
2193
2194 Check if STMT performs a function call that can be vectorized.
2195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2198
2199 static bool
2200 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2201 slp_tree slp_node)
2202 {
2203 tree vec_dest;
2204 tree scalar_dest;
2205 tree op, type;
2206 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2208 tree vectype_out, vectype_in;
2209 int nunits_in;
2210 int nunits_out;
2211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2213 tree fndecl, new_temp, def, rhs_type;
2214 gimple def_stmt;
2215 enum vect_def_type dt[3]
2216 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2217 gimple new_stmt = NULL;
2218 int ncopies, j;
2219 vec<tree> vargs = vNULL;
2220 enum { NARROW, NONE, WIDEN } modifier;
2221 size_t i, nargs;
2222 tree lhs;
2223
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2225 return false;
2226
2227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2228 return false;
2229
2230 /* Is STMT a vectorizable call? */
2231 if (!is_gimple_call (stmt))
2232 return false;
2233
2234 if (gimple_call_internal_p (stmt)
2235 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2236 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2237 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2238 slp_node);
2239
2240 if (gimple_call_lhs (stmt) == NULL_TREE
2241 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2242 return false;
2243
2244 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2245
2246 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2247
2248 /* Process function arguments. */
2249 rhs_type = NULL_TREE;
2250 vectype_in = NULL_TREE;
2251 nargs = gimple_call_num_args (stmt);
2252
2253 /* Bail out if the function has more than three arguments, we do not have
2254 interesting builtin functions to vectorize with more than two arguments
2255 except for fma. No arguments is also not good. */
2256 if (nargs == 0 || nargs > 3)
2257 return false;
2258
2259 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2260 if (gimple_call_internal_p (stmt)
2261 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2262 {
2263 nargs = 0;
2264 rhs_type = unsigned_type_node;
2265 }
2266
2267 for (i = 0; i < nargs; i++)
2268 {
2269 tree opvectype;
2270
2271 op = gimple_call_arg (stmt, i);
2272
2273 /* We can only handle calls with arguments of the same type. */
2274 if (rhs_type
2275 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2276 {
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2279 "argument types differ.\n");
2280 return false;
2281 }
2282 if (!rhs_type)
2283 rhs_type = TREE_TYPE (op);
2284
2285 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2286 &def_stmt, &def, &dt[i], &opvectype))
2287 {
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2290 "use not simple.\n");
2291 return false;
2292 }
2293
2294 if (!vectype_in)
2295 vectype_in = opvectype;
2296 else if (opvectype
2297 && opvectype != vectype_in)
2298 {
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2301 "argument vector types differ.\n");
2302 return false;
2303 }
2304 }
2305 /* If all arguments are external or constant defs use a vector type with
2306 the same size as the output vector type. */
2307 if (!vectype_in)
2308 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2309 if (vec_stmt)
2310 gcc_assert (vectype_in);
2311 if (!vectype_in)
2312 {
2313 if (dump_enabled_p ())
2314 {
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316 "no vectype for scalar type ");
2317 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2318 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2319 }
2320
2321 return false;
2322 }
2323
2324 /* FORNOW */
2325 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2326 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2327 if (nunits_in == nunits_out / 2)
2328 modifier = NARROW;
2329 else if (nunits_out == nunits_in)
2330 modifier = NONE;
2331 else if (nunits_out == nunits_in / 2)
2332 modifier = WIDEN;
2333 else
2334 return false;
2335
2336 /* For now, we only vectorize functions if a target specific builtin
2337 is available. TODO -- in some cases, it might be profitable to
2338 insert the calls for pieces of the vector, in order to be able
2339 to vectorize other operations in the loop. */
2340 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2341 if (fndecl == NULL_TREE)
2342 {
2343 if (gimple_call_internal_p (stmt)
2344 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2345 && !slp_node
2346 && loop_vinfo
2347 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2348 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2349 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2350 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2351 {
2352 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2353 { 0, 1, 2, ... vf - 1 } vector. */
2354 gcc_assert (nargs == 0);
2355 }
2356 else
2357 {
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "function is not vectorizable.\n");
2361 return false;
2362 }
2363 }
2364
2365 gcc_assert (!gimple_vuse (stmt));
2366
2367 if (slp_node || PURE_SLP_STMT (stmt_info))
2368 ncopies = 1;
2369 else if (modifier == NARROW)
2370 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2371 else
2372 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2373
2374 /* Sanity check: make sure that at least one copy of the vectorized stmt
2375 needs to be generated. */
2376 gcc_assert (ncopies >= 1);
2377
2378 if (!vec_stmt) /* transformation not required. */
2379 {
2380 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2381 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2383 "\n");
2384 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2385 return true;
2386 }
2387
2388 /** Transform. **/
2389
2390 if (dump_enabled_p ())
2391 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2392
2393 /* Handle def. */
2394 scalar_dest = gimple_call_lhs (stmt);
2395 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2396
2397 prev_stmt_info = NULL;
2398 switch (modifier)
2399 {
2400 case NONE:
2401 for (j = 0; j < ncopies; ++j)
2402 {
2403 /* Build argument list for the vectorized call. */
2404 if (j == 0)
2405 vargs.create (nargs);
2406 else
2407 vargs.truncate (0);
2408
2409 if (slp_node)
2410 {
2411 auto_vec<vec<tree> > vec_defs (nargs);
2412 vec<tree> vec_oprnds0;
2413
2414 for (i = 0; i < nargs; i++)
2415 vargs.quick_push (gimple_call_arg (stmt, i));
2416 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2417 vec_oprnds0 = vec_defs[0];
2418
2419 /* Arguments are ready. Create the new vector stmt. */
2420 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2421 {
2422 size_t k;
2423 for (k = 0; k < nargs; k++)
2424 {
2425 vec<tree> vec_oprndsk = vec_defs[k];
2426 vargs[k] = vec_oprndsk[i];
2427 }
2428 new_stmt = gimple_build_call_vec (fndecl, vargs);
2429 new_temp = make_ssa_name (vec_dest, new_stmt);
2430 gimple_call_set_lhs (new_stmt, new_temp);
2431 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2432 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2433 }
2434
2435 for (i = 0; i < nargs; i++)
2436 {
2437 vec<tree> vec_oprndsi = vec_defs[i];
2438 vec_oprndsi.release ();
2439 }
2440 continue;
2441 }
2442
2443 for (i = 0; i < nargs; i++)
2444 {
2445 op = gimple_call_arg (stmt, i);
2446 if (j == 0)
2447 vec_oprnd0
2448 = vect_get_vec_def_for_operand (op, stmt, NULL);
2449 else
2450 {
2451 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2452 vec_oprnd0
2453 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2454 }
2455
2456 vargs.quick_push (vec_oprnd0);
2457 }
2458
2459 if (gimple_call_internal_p (stmt)
2460 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2461 {
2462 tree *v = XALLOCAVEC (tree, nunits_out);
2463 int k;
2464 for (k = 0; k < nunits_out; ++k)
2465 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2466 tree cst = build_vector (vectype_out, v);
2467 tree new_var
2468 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2469 gimple init_stmt = gimple_build_assign (new_var, cst);
2470 new_temp = make_ssa_name (new_var, init_stmt);
2471 gimple_assign_set_lhs (init_stmt, new_temp);
2472 vect_init_vector_1 (stmt, init_stmt, NULL);
2473 new_temp = make_ssa_name (vec_dest, NULL);
2474 new_stmt = gimple_build_assign (new_temp,
2475 gimple_assign_lhs (init_stmt));
2476 }
2477 else
2478 {
2479 new_stmt = gimple_build_call_vec (fndecl, vargs);
2480 new_temp = make_ssa_name (vec_dest, new_stmt);
2481 gimple_call_set_lhs (new_stmt, new_temp);
2482 }
2483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2484
2485 if (j == 0)
2486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2487 else
2488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2489
2490 prev_stmt_info = vinfo_for_stmt (new_stmt);
2491 }
2492
2493 break;
2494
2495 case NARROW:
2496 for (j = 0; j < ncopies; ++j)
2497 {
2498 /* Build argument list for the vectorized call. */
2499 if (j == 0)
2500 vargs.create (nargs * 2);
2501 else
2502 vargs.truncate (0);
2503
2504 if (slp_node)
2505 {
2506 auto_vec<vec<tree> > vec_defs (nargs);
2507 vec<tree> vec_oprnds0;
2508
2509 for (i = 0; i < nargs; i++)
2510 vargs.quick_push (gimple_call_arg (stmt, i));
2511 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2512 vec_oprnds0 = vec_defs[0];
2513
2514 /* Arguments are ready. Create the new vector stmt. */
2515 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2516 {
2517 size_t k;
2518 vargs.truncate (0);
2519 for (k = 0; k < nargs; k++)
2520 {
2521 vec<tree> vec_oprndsk = vec_defs[k];
2522 vargs.quick_push (vec_oprndsk[i]);
2523 vargs.quick_push (vec_oprndsk[i + 1]);
2524 }
2525 new_stmt = gimple_build_call_vec (fndecl, vargs);
2526 new_temp = make_ssa_name (vec_dest, new_stmt);
2527 gimple_call_set_lhs (new_stmt, new_temp);
2528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2529 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2530 }
2531
2532 for (i = 0; i < nargs; i++)
2533 {
2534 vec<tree> vec_oprndsi = vec_defs[i];
2535 vec_oprndsi.release ();
2536 }
2537 continue;
2538 }
2539
2540 for (i = 0; i < nargs; i++)
2541 {
2542 op = gimple_call_arg (stmt, i);
2543 if (j == 0)
2544 {
2545 vec_oprnd0
2546 = vect_get_vec_def_for_operand (op, stmt, NULL);
2547 vec_oprnd1
2548 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2549 }
2550 else
2551 {
2552 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2553 vec_oprnd0
2554 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2555 vec_oprnd1
2556 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2557 }
2558
2559 vargs.quick_push (vec_oprnd0);
2560 vargs.quick_push (vec_oprnd1);
2561 }
2562
2563 new_stmt = gimple_build_call_vec (fndecl, vargs);
2564 new_temp = make_ssa_name (vec_dest, new_stmt);
2565 gimple_call_set_lhs (new_stmt, new_temp);
2566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2567
2568 if (j == 0)
2569 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2570 else
2571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2572
2573 prev_stmt_info = vinfo_for_stmt (new_stmt);
2574 }
2575
2576 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2577
2578 break;
2579
2580 case WIDEN:
2581 /* No current target implements this case. */
2582 return false;
2583 }
2584
2585 vargs.release ();
2586
2587 /* The call in STMT might prevent it from being removed in dce.
2588 We however cannot remove it here, due to the way the ssa name
2589 it defines is mapped to the new definition. So just replace
2590 rhs of the statement with something harmless. */
2591
2592 if (slp_node)
2593 return true;
2594
2595 type = TREE_TYPE (scalar_dest);
2596 if (is_pattern_stmt_p (stmt_info))
2597 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2598 else
2599 lhs = gimple_call_lhs (stmt);
2600 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2601 set_vinfo_for_stmt (new_stmt, stmt_info);
2602 set_vinfo_for_stmt (stmt, NULL);
2603 STMT_VINFO_STMT (stmt_info) = new_stmt;
2604 gsi_replace (gsi, new_stmt, false);
2605
2606 return true;
2607 }
2608
2609
2610 struct simd_call_arg_info
2611 {
2612 tree vectype;
2613 tree op;
2614 enum vect_def_type dt;
2615 HOST_WIDE_INT linear_step;
2616 unsigned int align;
2617 };
2618
2619 /* Function vectorizable_simd_clone_call.
2620
2621 Check if STMT performs a function call that can be vectorized
2622 by calling a simd clone of the function.
2623 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2624 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2625 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2626
2627 static bool
2628 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2629 gimple *vec_stmt, slp_tree slp_node)
2630 {
2631 tree vec_dest;
2632 tree scalar_dest;
2633 tree op, type;
2634 tree vec_oprnd0 = NULL_TREE;
2635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2636 tree vectype;
2637 unsigned int nunits;
2638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2640 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2641 tree fndecl, new_temp, def;
2642 gimple def_stmt;
2643 gimple new_stmt = NULL;
2644 int ncopies, j;
2645 vec<simd_call_arg_info> arginfo = vNULL;
2646 vec<tree> vargs = vNULL;
2647 size_t i, nargs;
2648 tree lhs, rtype, ratype;
2649 vec<constructor_elt, va_gc> *ret_ctor_elts;
2650
2651 /* Is STMT a vectorizable call? */
2652 if (!is_gimple_call (stmt))
2653 return false;
2654
2655 fndecl = gimple_call_fndecl (stmt);
2656 if (fndecl == NULL_TREE)
2657 return false;
2658
2659 struct cgraph_node *node = cgraph_node::get (fndecl);
2660 if (node == NULL || node->simd_clones == NULL)
2661 return false;
2662
2663 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2664 return false;
2665
2666 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2667 return false;
2668
2669 if (gimple_call_lhs (stmt)
2670 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2671 return false;
2672
2673 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2674
2675 vectype = STMT_VINFO_VECTYPE (stmt_info);
2676
2677 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2678 return false;
2679
2680 /* FORNOW */
2681 if (slp_node || PURE_SLP_STMT (stmt_info))
2682 return false;
2683
2684 /* Process function arguments. */
2685 nargs = gimple_call_num_args (stmt);
2686
2687 /* Bail out if the function has zero arguments. */
2688 if (nargs == 0)
2689 return false;
2690
2691 arginfo.create (nargs);
2692
2693 for (i = 0; i < nargs; i++)
2694 {
2695 simd_call_arg_info thisarginfo;
2696 affine_iv iv;
2697
2698 thisarginfo.linear_step = 0;
2699 thisarginfo.align = 0;
2700 thisarginfo.op = NULL_TREE;
2701
2702 op = gimple_call_arg (stmt, i);
2703 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2704 &def_stmt, &def, &thisarginfo.dt,
2705 &thisarginfo.vectype)
2706 || thisarginfo.dt == vect_uninitialized_def)
2707 {
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "use not simple.\n");
2711 arginfo.release ();
2712 return false;
2713 }
2714
2715 if (thisarginfo.dt == vect_constant_def
2716 || thisarginfo.dt == vect_external_def)
2717 gcc_assert (thisarginfo.vectype == NULL_TREE);
2718 else
2719 gcc_assert (thisarginfo.vectype != NULL_TREE);
2720
2721 if (thisarginfo.dt != vect_constant_def
2722 && thisarginfo.dt != vect_external_def
2723 && loop_vinfo
2724 && TREE_CODE (op) == SSA_NAME
2725 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2726 && tree_fits_shwi_p (iv.step))
2727 {
2728 thisarginfo.linear_step = tree_to_shwi (iv.step);
2729 thisarginfo.op = iv.base;
2730 }
2731 else if ((thisarginfo.dt == vect_constant_def
2732 || thisarginfo.dt == vect_external_def)
2733 && POINTER_TYPE_P (TREE_TYPE (op)))
2734 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2735
2736 arginfo.quick_push (thisarginfo);
2737 }
2738
2739 unsigned int badness = 0;
2740 struct cgraph_node *bestn = NULL;
2741 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2742 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2743 else
2744 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2745 n = n->simdclone->next_clone)
2746 {
2747 unsigned int this_badness = 0;
2748 if (n->simdclone->simdlen
2749 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2750 || n->simdclone->nargs != nargs)
2751 continue;
2752 if (n->simdclone->simdlen
2753 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2754 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2755 - exact_log2 (n->simdclone->simdlen)) * 1024;
2756 if (n->simdclone->inbranch)
2757 this_badness += 2048;
2758 int target_badness = targetm.simd_clone.usable (n);
2759 if (target_badness < 0)
2760 continue;
2761 this_badness += target_badness * 512;
2762 /* FORNOW: Have to add code to add the mask argument. */
2763 if (n->simdclone->inbranch)
2764 continue;
2765 for (i = 0; i < nargs; i++)
2766 {
2767 switch (n->simdclone->args[i].arg_type)
2768 {
2769 case SIMD_CLONE_ARG_TYPE_VECTOR:
2770 if (!useless_type_conversion_p
2771 (n->simdclone->args[i].orig_type,
2772 TREE_TYPE (gimple_call_arg (stmt, i))))
2773 i = -1;
2774 else if (arginfo[i].dt == vect_constant_def
2775 || arginfo[i].dt == vect_external_def
2776 || arginfo[i].linear_step)
2777 this_badness += 64;
2778 break;
2779 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2780 if (arginfo[i].dt != vect_constant_def
2781 && arginfo[i].dt != vect_external_def)
2782 i = -1;
2783 break;
2784 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2785 if (arginfo[i].dt == vect_constant_def
2786 || arginfo[i].dt == vect_external_def
2787 || (arginfo[i].linear_step
2788 != n->simdclone->args[i].linear_step))
2789 i = -1;
2790 break;
2791 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2792 /* FORNOW */
2793 i = -1;
2794 break;
2795 case SIMD_CLONE_ARG_TYPE_MASK:
2796 gcc_unreachable ();
2797 }
2798 if (i == (size_t) -1)
2799 break;
2800 if (n->simdclone->args[i].alignment > arginfo[i].align)
2801 {
2802 i = -1;
2803 break;
2804 }
2805 if (arginfo[i].align)
2806 this_badness += (exact_log2 (arginfo[i].align)
2807 - exact_log2 (n->simdclone->args[i].alignment));
2808 }
2809 if (i == (size_t) -1)
2810 continue;
2811 if (bestn == NULL || this_badness < badness)
2812 {
2813 bestn = n;
2814 badness = this_badness;
2815 }
2816 }
2817
2818 if (bestn == NULL)
2819 {
2820 arginfo.release ();
2821 return false;
2822 }
2823
2824 for (i = 0; i < nargs; i++)
2825 if ((arginfo[i].dt == vect_constant_def
2826 || arginfo[i].dt == vect_external_def)
2827 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2828 {
2829 arginfo[i].vectype
2830 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2831 i)));
2832 if (arginfo[i].vectype == NULL
2833 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2834 > bestn->simdclone->simdlen))
2835 {
2836 arginfo.release ();
2837 return false;
2838 }
2839 }
2840
2841 fndecl = bestn->decl;
2842 nunits = bestn->simdclone->simdlen;
2843 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2844
2845 /* If the function isn't const, only allow it in simd loops where user
2846 has asserted that at least nunits consecutive iterations can be
2847 performed using SIMD instructions. */
2848 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2849 && gimple_vuse (stmt))
2850 {
2851 arginfo.release ();
2852 return false;
2853 }
2854
2855 /* Sanity check: make sure that at least one copy of the vectorized stmt
2856 needs to be generated. */
2857 gcc_assert (ncopies >= 1);
2858
2859 if (!vec_stmt) /* transformation not required. */
2860 {
2861 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2862 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE, vect_location,
2865 "=== vectorizable_simd_clone_call ===\n");
2866 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2867 arginfo.release ();
2868 return true;
2869 }
2870
2871 /** Transform. **/
2872
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2875
2876 /* Handle def. */
2877 scalar_dest = gimple_call_lhs (stmt);
2878 vec_dest = NULL_TREE;
2879 rtype = NULL_TREE;
2880 ratype = NULL_TREE;
2881 if (scalar_dest)
2882 {
2883 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2884 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2885 if (TREE_CODE (rtype) == ARRAY_TYPE)
2886 {
2887 ratype = rtype;
2888 rtype = TREE_TYPE (ratype);
2889 }
2890 }
2891
2892 prev_stmt_info = NULL;
2893 for (j = 0; j < ncopies; ++j)
2894 {
2895 /* Build argument list for the vectorized call. */
2896 if (j == 0)
2897 vargs.create (nargs);
2898 else
2899 vargs.truncate (0);
2900
2901 for (i = 0; i < nargs; i++)
2902 {
2903 unsigned int k, l, m, o;
2904 tree atype;
2905 op = gimple_call_arg (stmt, i);
2906 switch (bestn->simdclone->args[i].arg_type)
2907 {
2908 case SIMD_CLONE_ARG_TYPE_VECTOR:
2909 atype = bestn->simdclone->args[i].vector_type;
2910 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2911 for (m = j * o; m < (j + 1) * o; m++)
2912 {
2913 if (TYPE_VECTOR_SUBPARTS (atype)
2914 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2915 {
2916 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2917 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2918 / TYPE_VECTOR_SUBPARTS (atype));
2919 gcc_assert ((k & (k - 1)) == 0);
2920 if (m == 0)
2921 vec_oprnd0
2922 = vect_get_vec_def_for_operand (op, stmt, NULL);
2923 else
2924 {
2925 vec_oprnd0 = arginfo[i].op;
2926 if ((m & (k - 1)) == 0)
2927 vec_oprnd0
2928 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2929 vec_oprnd0);
2930 }
2931 arginfo[i].op = vec_oprnd0;
2932 vec_oprnd0
2933 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2934 size_int (prec),
2935 bitsize_int ((m & (k - 1)) * prec));
2936 new_stmt
2937 = gimple_build_assign (make_ssa_name (atype, NULL),
2938 vec_oprnd0);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 vargs.safe_push (gimple_assign_lhs (new_stmt));
2941 }
2942 else
2943 {
2944 k = (TYPE_VECTOR_SUBPARTS (atype)
2945 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2946 gcc_assert ((k & (k - 1)) == 0);
2947 vec<constructor_elt, va_gc> *ctor_elts;
2948 if (k != 1)
2949 vec_alloc (ctor_elts, k);
2950 else
2951 ctor_elts = NULL;
2952 for (l = 0; l < k; l++)
2953 {
2954 if (m == 0 && l == 0)
2955 vec_oprnd0
2956 = vect_get_vec_def_for_operand (op, stmt, NULL);
2957 else
2958 vec_oprnd0
2959 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2960 arginfo[i].op);
2961 arginfo[i].op = vec_oprnd0;
2962 if (k == 1)
2963 break;
2964 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2965 vec_oprnd0);
2966 }
2967 if (k == 1)
2968 vargs.safe_push (vec_oprnd0);
2969 else
2970 {
2971 vec_oprnd0 = build_constructor (atype, ctor_elts);
2972 new_stmt
2973 = gimple_build_assign (make_ssa_name (atype, NULL),
2974 vec_oprnd0);
2975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2976 vargs.safe_push (gimple_assign_lhs (new_stmt));
2977 }
2978 }
2979 }
2980 break;
2981 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2982 vargs.safe_push (op);
2983 break;
2984 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2985 if (j == 0)
2986 {
2987 gimple_seq stmts;
2988 arginfo[i].op
2989 = force_gimple_operand (arginfo[i].op, &stmts, true,
2990 NULL_TREE);
2991 if (stmts != NULL)
2992 {
2993 basic_block new_bb;
2994 edge pe = loop_preheader_edge (loop);
2995 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2996 gcc_assert (!new_bb);
2997 }
2998 tree phi_res = copy_ssa_name (op, NULL);
2999 gimple new_phi = create_phi_node (phi_res, loop->header);
3000 set_vinfo_for_stmt (new_phi,
3001 new_stmt_vec_info (new_phi, loop_vinfo,
3002 NULL));
3003 add_phi_arg (new_phi, arginfo[i].op,
3004 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3005 enum tree_code code
3006 = POINTER_TYPE_P (TREE_TYPE (op))
3007 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3008 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3009 ? sizetype : TREE_TYPE (op);
3010 widest_int cst
3011 = wi::mul (bestn->simdclone->args[i].linear_step,
3012 ncopies * nunits);
3013 tree tcst = wide_int_to_tree (type, cst);
3014 tree phi_arg = copy_ssa_name (op, NULL);
3015 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3016 phi_res, tcst);
3017 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3018 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3019 set_vinfo_for_stmt (new_stmt,
3020 new_stmt_vec_info (new_stmt, loop_vinfo,
3021 NULL));
3022 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3023 UNKNOWN_LOCATION);
3024 arginfo[i].op = phi_res;
3025 vargs.safe_push (phi_res);
3026 }
3027 else
3028 {
3029 enum tree_code code
3030 = POINTER_TYPE_P (TREE_TYPE (op))
3031 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3032 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3033 ? sizetype : TREE_TYPE (op);
3034 widest_int cst
3035 = wi::mul (bestn->simdclone->args[i].linear_step,
3036 j * nunits);
3037 tree tcst = wide_int_to_tree (type, cst);
3038 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3039 new_stmt
3040 = gimple_build_assign_with_ops (code, new_temp,
3041 arginfo[i].op, tcst);
3042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3043 vargs.safe_push (new_temp);
3044 }
3045 break;
3046 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3047 default:
3048 gcc_unreachable ();
3049 }
3050 }
3051
3052 new_stmt = gimple_build_call_vec (fndecl, vargs);
3053 if (vec_dest)
3054 {
3055 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3056 if (ratype)
3057 new_temp = create_tmp_var (ratype, NULL);
3058 else if (TYPE_VECTOR_SUBPARTS (vectype)
3059 == TYPE_VECTOR_SUBPARTS (rtype))
3060 new_temp = make_ssa_name (vec_dest, new_stmt);
3061 else
3062 new_temp = make_ssa_name (rtype, new_stmt);
3063 gimple_call_set_lhs (new_stmt, new_temp);
3064 }
3065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3066
3067 if (vec_dest)
3068 {
3069 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3070 {
3071 unsigned int k, l;
3072 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3073 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3074 gcc_assert ((k & (k - 1)) == 0);
3075 for (l = 0; l < k; l++)
3076 {
3077 tree t;
3078 if (ratype)
3079 {
3080 t = build_fold_addr_expr (new_temp);
3081 t = build2 (MEM_REF, vectype, t,
3082 build_int_cst (TREE_TYPE (t),
3083 l * prec / BITS_PER_UNIT));
3084 }
3085 else
3086 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3087 size_int (prec), bitsize_int (l * prec));
3088 new_stmt
3089 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3090 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3091 if (j == 0 && l == 0)
3092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3093 else
3094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3095
3096 prev_stmt_info = vinfo_for_stmt (new_stmt);
3097 }
3098
3099 if (ratype)
3100 {
3101 tree clobber = build_constructor (ratype, NULL);
3102 TREE_THIS_VOLATILE (clobber) = 1;
3103 new_stmt = gimple_build_assign (new_temp, clobber);
3104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3105 }
3106 continue;
3107 }
3108 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3109 {
3110 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3111 / TYPE_VECTOR_SUBPARTS (rtype));
3112 gcc_assert ((k & (k - 1)) == 0);
3113 if ((j & (k - 1)) == 0)
3114 vec_alloc (ret_ctor_elts, k);
3115 if (ratype)
3116 {
3117 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3118 for (m = 0; m < o; m++)
3119 {
3120 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3121 size_int (m), NULL_TREE, NULL_TREE);
3122 new_stmt
3123 = gimple_build_assign (make_ssa_name (rtype, NULL),
3124 tem);
3125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3126 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3127 gimple_assign_lhs (new_stmt));
3128 }
3129 tree clobber = build_constructor (ratype, NULL);
3130 TREE_THIS_VOLATILE (clobber) = 1;
3131 new_stmt = gimple_build_assign (new_temp, clobber);
3132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3133 }
3134 else
3135 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3136 if ((j & (k - 1)) != k - 1)
3137 continue;
3138 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3139 new_stmt
3140 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3141 vec_oprnd0);
3142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3143
3144 if ((unsigned) j == k - 1)
3145 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3146 else
3147 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3148
3149 prev_stmt_info = vinfo_for_stmt (new_stmt);
3150 continue;
3151 }
3152 else if (ratype)
3153 {
3154 tree t = build_fold_addr_expr (new_temp);
3155 t = build2 (MEM_REF, vectype, t,
3156 build_int_cst (TREE_TYPE (t), 0));
3157 new_stmt
3158 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 tree clobber = build_constructor (ratype, NULL);
3161 TREE_THIS_VOLATILE (clobber) = 1;
3162 vect_finish_stmt_generation (stmt,
3163 gimple_build_assign (new_temp,
3164 clobber), gsi);
3165 }
3166 }
3167
3168 if (j == 0)
3169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3170 else
3171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3172
3173 prev_stmt_info = vinfo_for_stmt (new_stmt);
3174 }
3175
3176 vargs.release ();
3177
3178 /* The call in STMT might prevent it from being removed in dce.
3179 We however cannot remove it here, due to the way the ssa name
3180 it defines is mapped to the new definition. So just replace
3181 rhs of the statement with something harmless. */
3182
3183 if (slp_node)
3184 return true;
3185
3186 if (scalar_dest)
3187 {
3188 type = TREE_TYPE (scalar_dest);
3189 if (is_pattern_stmt_p (stmt_info))
3190 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3191 else
3192 lhs = gimple_call_lhs (stmt);
3193 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3194 }
3195 else
3196 new_stmt = gimple_build_nop ();
3197 set_vinfo_for_stmt (new_stmt, stmt_info);
3198 set_vinfo_for_stmt (stmt, NULL);
3199 STMT_VINFO_STMT (stmt_info) = new_stmt;
3200 gsi_replace (gsi, new_stmt, false);
3201 unlink_stmt_vdef (stmt);
3202
3203 return true;
3204 }
3205
3206
3207 /* Function vect_gen_widened_results_half
3208
3209 Create a vector stmt whose code, type, number of arguments, and result
3210 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3211 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3212 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3213 needs to be created (DECL is a function-decl of a target-builtin).
3214 STMT is the original scalar stmt that we are vectorizing. */
3215
3216 static gimple
3217 vect_gen_widened_results_half (enum tree_code code,
3218 tree decl,
3219 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3220 tree vec_dest, gimple_stmt_iterator *gsi,
3221 gimple stmt)
3222 {
3223 gimple new_stmt;
3224 tree new_temp;
3225
3226 /* Generate half of the widened result: */
3227 if (code == CALL_EXPR)
3228 {
3229 /* Target specific support */
3230 if (op_type == binary_op)
3231 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3232 else
3233 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3234 new_temp = make_ssa_name (vec_dest, new_stmt);
3235 gimple_call_set_lhs (new_stmt, new_temp);
3236 }
3237 else
3238 {
3239 /* Generic support */
3240 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3241 if (op_type != binary_op)
3242 vec_oprnd1 = NULL;
3243 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3244 vec_oprnd1);
3245 new_temp = make_ssa_name (vec_dest, new_stmt);
3246 gimple_assign_set_lhs (new_stmt, new_temp);
3247 }
3248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3249
3250 return new_stmt;
3251 }
3252
3253
3254 /* Get vectorized definitions for loop-based vectorization. For the first
3255 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3256 scalar operand), and for the rest we get a copy with
3257 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3258 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3259 The vectors are collected into VEC_OPRNDS. */
3260
3261 static void
3262 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3263 vec<tree> *vec_oprnds, int multi_step_cvt)
3264 {
3265 tree vec_oprnd;
3266
3267 /* Get first vector operand. */
3268 /* All the vector operands except the very first one (that is scalar oprnd)
3269 are stmt copies. */
3270 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3271 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3272 else
3273 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3274
3275 vec_oprnds->quick_push (vec_oprnd);
3276
3277 /* Get second vector operand. */
3278 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3279 vec_oprnds->quick_push (vec_oprnd);
3280
3281 *oprnd = vec_oprnd;
3282
3283 /* For conversion in multiple steps, continue to get operands
3284 recursively. */
3285 if (multi_step_cvt)
3286 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3287 }
3288
3289
3290 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3291 For multi-step conversions store the resulting vectors and call the function
3292 recursively. */
3293
3294 static void
3295 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3296 int multi_step_cvt, gimple stmt,
3297 vec<tree> vec_dsts,
3298 gimple_stmt_iterator *gsi,
3299 slp_tree slp_node, enum tree_code code,
3300 stmt_vec_info *prev_stmt_info)
3301 {
3302 unsigned int i;
3303 tree vop0, vop1, new_tmp, vec_dest;
3304 gimple new_stmt;
3305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3306
3307 vec_dest = vec_dsts.pop ();
3308
3309 for (i = 0; i < vec_oprnds->length (); i += 2)
3310 {
3311 /* Create demotion operation. */
3312 vop0 = (*vec_oprnds)[i];
3313 vop1 = (*vec_oprnds)[i + 1];
3314 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3315 new_tmp = make_ssa_name (vec_dest, new_stmt);
3316 gimple_assign_set_lhs (new_stmt, new_tmp);
3317 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3318
3319 if (multi_step_cvt)
3320 /* Store the resulting vector for next recursive call. */
3321 (*vec_oprnds)[i/2] = new_tmp;
3322 else
3323 {
3324 /* This is the last step of the conversion sequence. Store the
3325 vectors in SLP_NODE or in vector info of the scalar statement
3326 (or in STMT_VINFO_RELATED_STMT chain). */
3327 if (slp_node)
3328 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3329 else
3330 {
3331 if (!*prev_stmt_info)
3332 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3333 else
3334 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3335
3336 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3337 }
3338 }
3339 }
3340
3341 /* For multi-step demotion operations we first generate demotion operations
3342 from the source type to the intermediate types, and then combine the
3343 results (stored in VEC_OPRNDS) in demotion operation to the destination
3344 type. */
3345 if (multi_step_cvt)
3346 {
3347 /* At each level of recursion we have half of the operands we had at the
3348 previous level. */
3349 vec_oprnds->truncate ((i+1)/2);
3350 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3351 stmt, vec_dsts, gsi, slp_node,
3352 VEC_PACK_TRUNC_EXPR,
3353 prev_stmt_info);
3354 }
3355
3356 vec_dsts.quick_push (vec_dest);
3357 }
3358
3359
3360 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3361 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3362 the resulting vectors and call the function recursively. */
3363
3364 static void
3365 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3366 vec<tree> *vec_oprnds1,
3367 gimple stmt, tree vec_dest,
3368 gimple_stmt_iterator *gsi,
3369 enum tree_code code1,
3370 enum tree_code code2, tree decl1,
3371 tree decl2, int op_type)
3372 {
3373 int i;
3374 tree vop0, vop1, new_tmp1, new_tmp2;
3375 gimple new_stmt1, new_stmt2;
3376 vec<tree> vec_tmp = vNULL;
3377
3378 vec_tmp.create (vec_oprnds0->length () * 2);
3379 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3380 {
3381 if (op_type == binary_op)
3382 vop1 = (*vec_oprnds1)[i];
3383 else
3384 vop1 = NULL_TREE;
3385
3386 /* Generate the two halves of promotion operation. */
3387 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3388 op_type, vec_dest, gsi, stmt);
3389 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3390 op_type, vec_dest, gsi, stmt);
3391 if (is_gimple_call (new_stmt1))
3392 {
3393 new_tmp1 = gimple_call_lhs (new_stmt1);
3394 new_tmp2 = gimple_call_lhs (new_stmt2);
3395 }
3396 else
3397 {
3398 new_tmp1 = gimple_assign_lhs (new_stmt1);
3399 new_tmp2 = gimple_assign_lhs (new_stmt2);
3400 }
3401
3402 /* Store the results for the next step. */
3403 vec_tmp.quick_push (new_tmp1);
3404 vec_tmp.quick_push (new_tmp2);
3405 }
3406
3407 vec_oprnds0->release ();
3408 *vec_oprnds0 = vec_tmp;
3409 }
3410
3411
3412 /* Check if STMT performs a conversion operation, that can be vectorized.
3413 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3414 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3415 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3416
3417 static bool
3418 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3419 gimple *vec_stmt, slp_tree slp_node)
3420 {
3421 tree vec_dest;
3422 tree scalar_dest;
3423 tree op0, op1 = NULL_TREE;
3424 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3426 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3427 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3428 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3429 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3430 tree new_temp;
3431 tree def;
3432 gimple def_stmt;
3433 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3434 gimple new_stmt = NULL;
3435 stmt_vec_info prev_stmt_info;
3436 int nunits_in;
3437 int nunits_out;
3438 tree vectype_out, vectype_in;
3439 int ncopies, i, j;
3440 tree lhs_type, rhs_type;
3441 enum { NARROW, NONE, WIDEN } modifier;
3442 vec<tree> vec_oprnds0 = vNULL;
3443 vec<tree> vec_oprnds1 = vNULL;
3444 tree vop0;
3445 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3446 int multi_step_cvt = 0;
3447 vec<tree> vec_dsts = vNULL;
3448 vec<tree> interm_types = vNULL;
3449 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3450 int op_type;
3451 machine_mode rhs_mode;
3452 unsigned short fltsz;
3453
3454 /* Is STMT a vectorizable conversion? */
3455
3456 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3457 return false;
3458
3459 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3460 return false;
3461
3462 if (!is_gimple_assign (stmt))
3463 return false;
3464
3465 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3466 return false;
3467
3468 code = gimple_assign_rhs_code (stmt);
3469 if (!CONVERT_EXPR_CODE_P (code)
3470 && code != FIX_TRUNC_EXPR
3471 && code != FLOAT_EXPR
3472 && code != WIDEN_MULT_EXPR
3473 && code != WIDEN_LSHIFT_EXPR)
3474 return false;
3475
3476 op_type = TREE_CODE_LENGTH (code);
3477
3478 /* Check types of lhs and rhs. */
3479 scalar_dest = gimple_assign_lhs (stmt);
3480 lhs_type = TREE_TYPE (scalar_dest);
3481 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3482
3483 op0 = gimple_assign_rhs1 (stmt);
3484 rhs_type = TREE_TYPE (op0);
3485
3486 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3487 && !((INTEGRAL_TYPE_P (lhs_type)
3488 && INTEGRAL_TYPE_P (rhs_type))
3489 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3490 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3491 return false;
3492
3493 if ((INTEGRAL_TYPE_P (lhs_type)
3494 && (TYPE_PRECISION (lhs_type)
3495 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3496 || (INTEGRAL_TYPE_P (rhs_type)
3497 && (TYPE_PRECISION (rhs_type)
3498 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3499 {
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3502 "type conversion to/from bit-precision unsupported."
3503 "\n");
3504 return false;
3505 }
3506
3507 /* Check the operands of the operation. */
3508 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3509 &def_stmt, &def, &dt[0], &vectype_in))
3510 {
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513 "use not simple.\n");
3514 return false;
3515 }
3516 if (op_type == binary_op)
3517 {
3518 bool ok;
3519
3520 op1 = gimple_assign_rhs2 (stmt);
3521 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3522 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3523 OP1. */
3524 if (CONSTANT_CLASS_P (op0))
3525 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3526 &def_stmt, &def, &dt[1], &vectype_in);
3527 else
3528 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3529 &def, &dt[1]);
3530
3531 if (!ok)
3532 {
3533 if (dump_enabled_p ())
3534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3535 "use not simple.\n");
3536 return false;
3537 }
3538 }
3539
3540 /* If op0 is an external or constant defs use a vector type of
3541 the same size as the output vector type. */
3542 if (!vectype_in)
3543 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3544 if (vec_stmt)
3545 gcc_assert (vectype_in);
3546 if (!vectype_in)
3547 {
3548 if (dump_enabled_p ())
3549 {
3550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3551 "no vectype for scalar type ");
3552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3553 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3554 }
3555
3556 return false;
3557 }
3558
3559 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3560 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3561 if (nunits_in < nunits_out)
3562 modifier = NARROW;
3563 else if (nunits_out == nunits_in)
3564 modifier = NONE;
3565 else
3566 modifier = WIDEN;
3567
3568 /* Multiple types in SLP are handled by creating the appropriate number of
3569 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3570 case of SLP. */
3571 if (slp_node || PURE_SLP_STMT (stmt_info))
3572 ncopies = 1;
3573 else if (modifier == NARROW)
3574 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3575 else
3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577
3578 /* Sanity check: make sure that at least one copy of the vectorized stmt
3579 needs to be generated. */
3580 gcc_assert (ncopies >= 1);
3581
3582 /* Supportable by target? */
3583 switch (modifier)
3584 {
3585 case NONE:
3586 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3587 return false;
3588 if (supportable_convert_operation (code, vectype_out, vectype_in,
3589 &decl1, &code1))
3590 break;
3591 /* FALLTHRU */
3592 unsupported:
3593 if (dump_enabled_p ())
3594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3595 "conversion not supported by target.\n");
3596 return false;
3597
3598 case WIDEN:
3599 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3600 &code1, &code2, &multi_step_cvt,
3601 &interm_types))
3602 {
3603 /* Binary widening operation can only be supported directly by the
3604 architecture. */
3605 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3606 break;
3607 }
3608
3609 if (code != FLOAT_EXPR
3610 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3611 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3612 goto unsupported;
3613
3614 rhs_mode = TYPE_MODE (rhs_type);
3615 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3616 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3617 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3618 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3619 {
3620 cvt_type
3621 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3622 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3623 if (cvt_type == NULL_TREE)
3624 goto unsupported;
3625
3626 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3627 {
3628 if (!supportable_convert_operation (code, vectype_out,
3629 cvt_type, &decl1, &codecvt1))
3630 goto unsupported;
3631 }
3632 else if (!supportable_widening_operation (code, stmt, vectype_out,
3633 cvt_type, &codecvt1,
3634 &codecvt2, &multi_step_cvt,
3635 &interm_types))
3636 continue;
3637 else
3638 gcc_assert (multi_step_cvt == 0);
3639
3640 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3641 vectype_in, &code1, &code2,
3642 &multi_step_cvt, &interm_types))
3643 break;
3644 }
3645
3646 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3647 goto unsupported;
3648
3649 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3650 codecvt2 = ERROR_MARK;
3651 else
3652 {
3653 multi_step_cvt++;
3654 interm_types.safe_push (cvt_type);
3655 cvt_type = NULL_TREE;
3656 }
3657 break;
3658
3659 case NARROW:
3660 gcc_assert (op_type == unary_op);
3661 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3662 &code1, &multi_step_cvt,
3663 &interm_types))
3664 break;
3665
3666 if (code != FIX_TRUNC_EXPR
3667 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3668 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3669 goto unsupported;
3670
3671 rhs_mode = TYPE_MODE (rhs_type);
3672 cvt_type
3673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3674 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3675 if (cvt_type == NULL_TREE)
3676 goto unsupported;
3677 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3678 &decl1, &codecvt1))
3679 goto unsupported;
3680 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3681 &code1, &multi_step_cvt,
3682 &interm_types))
3683 break;
3684 goto unsupported;
3685
3686 default:
3687 gcc_unreachable ();
3688 }
3689
3690 if (!vec_stmt) /* transformation not required. */
3691 {
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_NOTE, vect_location,
3694 "=== vectorizable_conversion ===\n");
3695 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3696 {
3697 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3698 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3699 }
3700 else if (modifier == NARROW)
3701 {
3702 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3703 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3704 }
3705 else
3706 {
3707 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3708 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3709 }
3710 interm_types.release ();
3711 return true;
3712 }
3713
3714 /** Transform. **/
3715 if (dump_enabled_p ())
3716 dump_printf_loc (MSG_NOTE, vect_location,
3717 "transform conversion. ncopies = %d.\n", ncopies);
3718
3719 if (op_type == binary_op)
3720 {
3721 if (CONSTANT_CLASS_P (op0))
3722 op0 = fold_convert (TREE_TYPE (op1), op0);
3723 else if (CONSTANT_CLASS_P (op1))
3724 op1 = fold_convert (TREE_TYPE (op0), op1);
3725 }
3726
3727 /* In case of multi-step conversion, we first generate conversion operations
3728 to the intermediate types, and then from that types to the final one.
3729 We create vector destinations for the intermediate type (TYPES) received
3730 from supportable_*_operation, and store them in the correct order
3731 for future use in vect_create_vectorized_*_stmts (). */
3732 vec_dsts.create (multi_step_cvt + 1);
3733 vec_dest = vect_create_destination_var (scalar_dest,
3734 (cvt_type && modifier == WIDEN)
3735 ? cvt_type : vectype_out);
3736 vec_dsts.quick_push (vec_dest);
3737
3738 if (multi_step_cvt)
3739 {
3740 for (i = interm_types.length () - 1;
3741 interm_types.iterate (i, &intermediate_type); i--)
3742 {
3743 vec_dest = vect_create_destination_var (scalar_dest,
3744 intermediate_type);
3745 vec_dsts.quick_push (vec_dest);
3746 }
3747 }
3748
3749 if (cvt_type)
3750 vec_dest = vect_create_destination_var (scalar_dest,
3751 modifier == WIDEN
3752 ? vectype_out : cvt_type);
3753
3754 if (!slp_node)
3755 {
3756 if (modifier == WIDEN)
3757 {
3758 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3759 if (op_type == binary_op)
3760 vec_oprnds1.create (1);
3761 }
3762 else if (modifier == NARROW)
3763 vec_oprnds0.create (
3764 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3765 }
3766 else if (code == WIDEN_LSHIFT_EXPR)
3767 vec_oprnds1.create (slp_node->vec_stmts_size);
3768
3769 last_oprnd = op0;
3770 prev_stmt_info = NULL;
3771 switch (modifier)
3772 {
3773 case NONE:
3774 for (j = 0; j < ncopies; j++)
3775 {
3776 if (j == 0)
3777 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3778 -1);
3779 else
3780 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3781
3782 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3783 {
3784 /* Arguments are ready, create the new vector stmt. */
3785 if (code1 == CALL_EXPR)
3786 {
3787 new_stmt = gimple_build_call (decl1, 1, vop0);
3788 new_temp = make_ssa_name (vec_dest, new_stmt);
3789 gimple_call_set_lhs (new_stmt, new_temp);
3790 }
3791 else
3792 {
3793 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3794 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3795 vop0, NULL);
3796 new_temp = make_ssa_name (vec_dest, new_stmt);
3797 gimple_assign_set_lhs (new_stmt, new_temp);
3798 }
3799
3800 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3801 if (slp_node)
3802 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3803 }
3804
3805 if (j == 0)
3806 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3807 else
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
3810 }
3811 break;
3812
3813 case WIDEN:
3814 /* In case the vectorization factor (VF) is bigger than the number
3815 of elements that we can fit in a vectype (nunits), we have to
3816 generate more than one vector stmt - i.e - we need to "unroll"
3817 the vector stmt by a factor VF/nunits. */
3818 for (j = 0; j < ncopies; j++)
3819 {
3820 /* Handle uses. */
3821 if (j == 0)
3822 {
3823 if (slp_node)
3824 {
3825 if (code == WIDEN_LSHIFT_EXPR)
3826 {
3827 unsigned int k;
3828
3829 vec_oprnd1 = op1;
3830 /* Store vec_oprnd1 for every vector stmt to be created
3831 for SLP_NODE. We check during the analysis that all
3832 the shift arguments are the same. */
3833 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3834 vec_oprnds1.quick_push (vec_oprnd1);
3835
3836 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3837 slp_node, -1);
3838 }
3839 else
3840 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3841 &vec_oprnds1, slp_node, -1);
3842 }
3843 else
3844 {
3845 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3846 vec_oprnds0.quick_push (vec_oprnd0);
3847 if (op_type == binary_op)
3848 {
3849 if (code == WIDEN_LSHIFT_EXPR)
3850 vec_oprnd1 = op1;
3851 else
3852 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3853 NULL);
3854 vec_oprnds1.quick_push (vec_oprnd1);
3855 }
3856 }
3857 }
3858 else
3859 {
3860 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3861 vec_oprnds0.truncate (0);
3862 vec_oprnds0.quick_push (vec_oprnd0);
3863 if (op_type == binary_op)
3864 {
3865 if (code == WIDEN_LSHIFT_EXPR)
3866 vec_oprnd1 = op1;
3867 else
3868 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3869 vec_oprnd1);
3870 vec_oprnds1.truncate (0);
3871 vec_oprnds1.quick_push (vec_oprnd1);
3872 }
3873 }
3874
3875 /* Arguments are ready. Create the new vector stmts. */
3876 for (i = multi_step_cvt; i >= 0; i--)
3877 {
3878 tree this_dest = vec_dsts[i];
3879 enum tree_code c1 = code1, c2 = code2;
3880 if (i == 0 && codecvt2 != ERROR_MARK)
3881 {
3882 c1 = codecvt1;
3883 c2 = codecvt2;
3884 }
3885 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3886 &vec_oprnds1,
3887 stmt, this_dest, gsi,
3888 c1, c2, decl1, decl2,
3889 op_type);
3890 }
3891
3892 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3893 {
3894 if (cvt_type)
3895 {
3896 if (codecvt1 == CALL_EXPR)
3897 {
3898 new_stmt = gimple_build_call (decl1, 1, vop0);
3899 new_temp = make_ssa_name (vec_dest, new_stmt);
3900 gimple_call_set_lhs (new_stmt, new_temp);
3901 }
3902 else
3903 {
3904 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3905 new_temp = make_ssa_name (vec_dest, NULL);
3906 new_stmt = gimple_build_assign_with_ops (codecvt1,
3907 new_temp,
3908 vop0, NULL);
3909 }
3910
3911 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3912 }
3913 else
3914 new_stmt = SSA_NAME_DEF_STMT (vop0);
3915
3916 if (slp_node)
3917 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3918 else
3919 {
3920 if (!prev_stmt_info)
3921 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3922 else
3923 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3924 prev_stmt_info = vinfo_for_stmt (new_stmt);
3925 }
3926 }
3927 }
3928
3929 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3930 break;
3931
3932 case NARROW:
3933 /* In case the vectorization factor (VF) is bigger than the number
3934 of elements that we can fit in a vectype (nunits), we have to
3935 generate more than one vector stmt - i.e - we need to "unroll"
3936 the vector stmt by a factor VF/nunits. */
3937 for (j = 0; j < ncopies; j++)
3938 {
3939 /* Handle uses. */
3940 if (slp_node)
3941 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3942 slp_node, -1);
3943 else
3944 {
3945 vec_oprnds0.truncate (0);
3946 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3947 vect_pow2 (multi_step_cvt) - 1);
3948 }
3949
3950 /* Arguments are ready. Create the new vector stmts. */
3951 if (cvt_type)
3952 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3953 {
3954 if (codecvt1 == CALL_EXPR)
3955 {
3956 new_stmt = gimple_build_call (decl1, 1, vop0);
3957 new_temp = make_ssa_name (vec_dest, new_stmt);
3958 gimple_call_set_lhs (new_stmt, new_temp);
3959 }
3960 else
3961 {
3962 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3963 new_temp = make_ssa_name (vec_dest, NULL);
3964 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3965 vop0, NULL);
3966 }
3967
3968 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3969 vec_oprnds0[i] = new_temp;
3970 }
3971
3972 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3973 stmt, vec_dsts, gsi,
3974 slp_node, code1,
3975 &prev_stmt_info);
3976 }
3977
3978 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3979 break;
3980 }
3981
3982 vec_oprnds0.release ();
3983 vec_oprnds1.release ();
3984 vec_dsts.release ();
3985 interm_types.release ();
3986
3987 return true;
3988 }
3989
3990
3991 /* Function vectorizable_assignment.
3992
3993 Check if STMT performs an assignment (copy) that can be vectorized.
3994 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3995 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3996 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3997
3998 static bool
3999 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4000 gimple *vec_stmt, slp_tree slp_node)
4001 {
4002 tree vec_dest;
4003 tree scalar_dest;
4004 tree op;
4005 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4007 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4008 tree new_temp;
4009 tree def;
4010 gimple def_stmt;
4011 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4012 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4013 int ncopies;
4014 int i, j;
4015 vec<tree> vec_oprnds = vNULL;
4016 tree vop;
4017 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4018 gimple new_stmt = NULL;
4019 stmt_vec_info prev_stmt_info = NULL;
4020 enum tree_code code;
4021 tree vectype_in;
4022
4023 /* Multiple types in SLP are handled by creating the appropriate number of
4024 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4025 case of SLP. */
4026 if (slp_node || PURE_SLP_STMT (stmt_info))
4027 ncopies = 1;
4028 else
4029 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4030
4031 gcc_assert (ncopies >= 1);
4032
4033 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4034 return false;
4035
4036 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4037 return false;
4038
4039 /* Is vectorizable assignment? */
4040 if (!is_gimple_assign (stmt))
4041 return false;
4042
4043 scalar_dest = gimple_assign_lhs (stmt);
4044 if (TREE_CODE (scalar_dest) != SSA_NAME)
4045 return false;
4046
4047 code = gimple_assign_rhs_code (stmt);
4048 if (gimple_assign_single_p (stmt)
4049 || code == PAREN_EXPR
4050 || CONVERT_EXPR_CODE_P (code))
4051 op = gimple_assign_rhs1 (stmt);
4052 else
4053 return false;
4054
4055 if (code == VIEW_CONVERT_EXPR)
4056 op = TREE_OPERAND (op, 0);
4057
4058 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4059 &def_stmt, &def, &dt[0], &vectype_in))
4060 {
4061 if (dump_enabled_p ())
4062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4063 "use not simple.\n");
4064 return false;
4065 }
4066
4067 /* We can handle NOP_EXPR conversions that do not change the number
4068 of elements or the vector size. */
4069 if ((CONVERT_EXPR_CODE_P (code)
4070 || code == VIEW_CONVERT_EXPR)
4071 && (!vectype_in
4072 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4073 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4074 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4075 return false;
4076
4077 /* We do not handle bit-precision changes. */
4078 if ((CONVERT_EXPR_CODE_P (code)
4079 || code == VIEW_CONVERT_EXPR)
4080 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4081 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4082 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4083 || ((TYPE_PRECISION (TREE_TYPE (op))
4084 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4085 /* But a conversion that does not change the bit-pattern is ok. */
4086 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4087 > TYPE_PRECISION (TREE_TYPE (op)))
4088 && TYPE_UNSIGNED (TREE_TYPE (op))))
4089 {
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4092 "type conversion to/from bit-precision "
4093 "unsupported.\n");
4094 return false;
4095 }
4096
4097 if (!vec_stmt) /* transformation not required. */
4098 {
4099 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4100 if (dump_enabled_p ())
4101 dump_printf_loc (MSG_NOTE, vect_location,
4102 "=== vectorizable_assignment ===\n");
4103 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4104 return true;
4105 }
4106
4107 /** Transform. **/
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4110
4111 /* Handle def. */
4112 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4113
4114 /* Handle use. */
4115 for (j = 0; j < ncopies; j++)
4116 {
4117 /* Handle uses. */
4118 if (j == 0)
4119 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4120 else
4121 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4122
4123 /* Arguments are ready. create the new vector stmt. */
4124 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4125 {
4126 if (CONVERT_EXPR_CODE_P (code)
4127 || code == VIEW_CONVERT_EXPR)
4128 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4129 new_stmt = gimple_build_assign (vec_dest, vop);
4130 new_temp = make_ssa_name (vec_dest, new_stmt);
4131 gimple_assign_set_lhs (new_stmt, new_temp);
4132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4133 if (slp_node)
4134 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4135 }
4136
4137 if (slp_node)
4138 continue;
4139
4140 if (j == 0)
4141 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4142 else
4143 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4144
4145 prev_stmt_info = vinfo_for_stmt (new_stmt);
4146 }
4147
4148 vec_oprnds.release ();
4149 return true;
4150 }
4151
4152
4153 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4154 either as shift by a scalar or by a vector. */
4155
4156 bool
4157 vect_supportable_shift (enum tree_code code, tree scalar_type)
4158 {
4159
4160 machine_mode vec_mode;
4161 optab optab;
4162 int icode;
4163 tree vectype;
4164
4165 vectype = get_vectype_for_scalar_type (scalar_type);
4166 if (!vectype)
4167 return false;
4168
4169 optab = optab_for_tree_code (code, vectype, optab_scalar);
4170 if (!optab
4171 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4172 {
4173 optab = optab_for_tree_code (code, vectype, optab_vector);
4174 if (!optab
4175 || (optab_handler (optab, TYPE_MODE (vectype))
4176 == CODE_FOR_nothing))
4177 return false;
4178 }
4179
4180 vec_mode = TYPE_MODE (vectype);
4181 icode = (int) optab_handler (optab, vec_mode);
4182 if (icode == CODE_FOR_nothing)
4183 return false;
4184
4185 return true;
4186 }
4187
4188
4189 /* Function vectorizable_shift.
4190
4191 Check if STMT performs a shift operation that can be vectorized.
4192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4195
4196 static bool
4197 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4198 gimple *vec_stmt, slp_tree slp_node)
4199 {
4200 tree vec_dest;
4201 tree scalar_dest;
4202 tree op0, op1 = NULL;
4203 tree vec_oprnd1 = NULL_TREE;
4204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4205 tree vectype;
4206 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4207 enum tree_code code;
4208 machine_mode vec_mode;
4209 tree new_temp;
4210 optab optab;
4211 int icode;
4212 machine_mode optab_op2_mode;
4213 tree def;
4214 gimple def_stmt;
4215 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4216 gimple new_stmt = NULL;
4217 stmt_vec_info prev_stmt_info;
4218 int nunits_in;
4219 int nunits_out;
4220 tree vectype_out;
4221 tree op1_vectype;
4222 int ncopies;
4223 int j, i;
4224 vec<tree> vec_oprnds0 = vNULL;
4225 vec<tree> vec_oprnds1 = vNULL;
4226 tree vop0, vop1;
4227 unsigned int k;
4228 bool scalar_shift_arg = true;
4229 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4230 int vf;
4231
4232 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4233 return false;
4234
4235 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4236 return false;
4237
4238 /* Is STMT a vectorizable binary/unary operation? */
4239 if (!is_gimple_assign (stmt))
4240 return false;
4241
4242 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4243 return false;
4244
4245 code = gimple_assign_rhs_code (stmt);
4246
4247 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4248 || code == RROTATE_EXPR))
4249 return false;
4250
4251 scalar_dest = gimple_assign_lhs (stmt);
4252 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4253 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4254 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4255 {
4256 if (dump_enabled_p ())
4257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4258 "bit-precision shifts not supported.\n");
4259 return false;
4260 }
4261
4262 op0 = gimple_assign_rhs1 (stmt);
4263 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4264 &def_stmt, &def, &dt[0], &vectype))
4265 {
4266 if (dump_enabled_p ())
4267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4268 "use not simple.\n");
4269 return false;
4270 }
4271 /* If op0 is an external or constant def use a vector type with
4272 the same size as the output vector type. */
4273 if (!vectype)
4274 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4275 if (vec_stmt)
4276 gcc_assert (vectype);
4277 if (!vectype)
4278 {
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4281 "no vectype for scalar type\n");
4282 return false;
4283 }
4284
4285 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4286 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4287 if (nunits_out != nunits_in)
4288 return false;
4289
4290 op1 = gimple_assign_rhs2 (stmt);
4291 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4292 &def, &dt[1], &op1_vectype))
4293 {
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "use not simple.\n");
4297 return false;
4298 }
4299
4300 if (loop_vinfo)
4301 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4302 else
4303 vf = 1;
4304
4305 /* Multiple types in SLP are handled by creating the appropriate number of
4306 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4307 case of SLP. */
4308 if (slp_node || PURE_SLP_STMT (stmt_info))
4309 ncopies = 1;
4310 else
4311 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4312
4313 gcc_assert (ncopies >= 1);
4314
4315 /* Determine whether the shift amount is a vector, or scalar. If the
4316 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4317
4318 if (dt[1] == vect_internal_def && !slp_node)
4319 scalar_shift_arg = false;
4320 else if (dt[1] == vect_constant_def
4321 || dt[1] == vect_external_def
4322 || dt[1] == vect_internal_def)
4323 {
4324 /* In SLP, need to check whether the shift count is the same,
4325 in loops if it is a constant or invariant, it is always
4326 a scalar shift. */
4327 if (slp_node)
4328 {
4329 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4330 gimple slpstmt;
4331
4332 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4333 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4334 scalar_shift_arg = false;
4335 }
4336 }
4337 else
4338 {
4339 if (dump_enabled_p ())
4340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4341 "operand mode requires invariant argument.\n");
4342 return false;
4343 }
4344
4345 /* Vector shifted by vector. */
4346 if (!scalar_shift_arg)
4347 {
4348 optab = optab_for_tree_code (code, vectype, optab_vector);
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE, vect_location,
4351 "vector/vector shift/rotate found.\n");
4352
4353 if (!op1_vectype)
4354 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4355 if (op1_vectype == NULL_TREE
4356 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4357 {
4358 if (dump_enabled_p ())
4359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4360 "unusable type for last operand in"
4361 " vector/vector shift/rotate.\n");
4362 return false;
4363 }
4364 }
4365 /* See if the machine has a vector shifted by scalar insn and if not
4366 then see if it has a vector shifted by vector insn. */
4367 else
4368 {
4369 optab = optab_for_tree_code (code, vectype, optab_scalar);
4370 if (optab
4371 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4372 {
4373 if (dump_enabled_p ())
4374 dump_printf_loc (MSG_NOTE, vect_location,
4375 "vector/scalar shift/rotate found.\n");
4376 }
4377 else
4378 {
4379 optab = optab_for_tree_code (code, vectype, optab_vector);
4380 if (optab
4381 && (optab_handler (optab, TYPE_MODE (vectype))
4382 != CODE_FOR_nothing))
4383 {
4384 scalar_shift_arg = false;
4385
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_NOTE, vect_location,
4388 "vector/vector shift/rotate found.\n");
4389
4390 /* Unlike the other binary operators, shifts/rotates have
4391 the rhs being int, instead of the same type as the lhs,
4392 so make sure the scalar is the right type if we are
4393 dealing with vectors of long long/long/short/char. */
4394 if (dt[1] == vect_constant_def)
4395 op1 = fold_convert (TREE_TYPE (vectype), op1);
4396 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4397 TREE_TYPE (op1)))
4398 {
4399 if (slp_node
4400 && TYPE_MODE (TREE_TYPE (vectype))
4401 != TYPE_MODE (TREE_TYPE (op1)))
4402 {
4403 if (dump_enabled_p ())
4404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4405 "unusable type for last operand in"
4406 " vector/vector shift/rotate.\n");
4407 return false;
4408 }
4409 if (vec_stmt && !slp_node)
4410 {
4411 op1 = fold_convert (TREE_TYPE (vectype), op1);
4412 op1 = vect_init_vector (stmt, op1,
4413 TREE_TYPE (vectype), NULL);
4414 }
4415 }
4416 }
4417 }
4418 }
4419
4420 /* Supportable by target? */
4421 if (!optab)
4422 {
4423 if (dump_enabled_p ())
4424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4425 "no optab.\n");
4426 return false;
4427 }
4428 vec_mode = TYPE_MODE (vectype);
4429 icode = (int) optab_handler (optab, vec_mode);
4430 if (icode == CODE_FOR_nothing)
4431 {
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4434 "op not supported by target.\n");
4435 /* Check only during analysis. */
4436 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4437 || (vf < vect_min_worthwhile_factor (code)
4438 && !vec_stmt))
4439 return false;
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_NOTE, vect_location,
4442 "proceeding using word mode.\n");
4443 }
4444
4445 /* Worthwhile without SIMD support? Check only during analysis. */
4446 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4447 && vf < vect_min_worthwhile_factor (code)
4448 && !vec_stmt)
4449 {
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4452 "not worthwhile without SIMD support.\n");
4453 return false;
4454 }
4455
4456 if (!vec_stmt) /* transformation not required. */
4457 {
4458 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4459 if (dump_enabled_p ())
4460 dump_printf_loc (MSG_NOTE, vect_location,
4461 "=== vectorizable_shift ===\n");
4462 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4463 return true;
4464 }
4465
4466 /** Transform. **/
4467
4468 if (dump_enabled_p ())
4469 dump_printf_loc (MSG_NOTE, vect_location,
4470 "transform binary/unary operation.\n");
4471
4472 /* Handle def. */
4473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4474
4475 prev_stmt_info = NULL;
4476 for (j = 0; j < ncopies; j++)
4477 {
4478 /* Handle uses. */
4479 if (j == 0)
4480 {
4481 if (scalar_shift_arg)
4482 {
4483 /* Vector shl and shr insn patterns can be defined with scalar
4484 operand 2 (shift operand). In this case, use constant or loop
4485 invariant op1 directly, without extending it to vector mode
4486 first. */
4487 optab_op2_mode = insn_data[icode].operand[2].mode;
4488 if (!VECTOR_MODE_P (optab_op2_mode))
4489 {
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_NOTE, vect_location,
4492 "operand 1 using scalar mode.\n");
4493 vec_oprnd1 = op1;
4494 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4495 vec_oprnds1.quick_push (vec_oprnd1);
4496 if (slp_node)
4497 {
4498 /* Store vec_oprnd1 for every vector stmt to be created
4499 for SLP_NODE. We check during the analysis that all
4500 the shift arguments are the same.
4501 TODO: Allow different constants for different vector
4502 stmts generated for an SLP instance. */
4503 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4504 vec_oprnds1.quick_push (vec_oprnd1);
4505 }
4506 }
4507 }
4508
4509 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4510 (a special case for certain kind of vector shifts); otherwise,
4511 operand 1 should be of a vector type (the usual case). */
4512 if (vec_oprnd1)
4513 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4514 slp_node, -1);
4515 else
4516 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4517 slp_node, -1);
4518 }
4519 else
4520 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4521
4522 /* Arguments are ready. Create the new vector stmt. */
4523 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4524 {
4525 vop1 = vec_oprnds1[i];
4526 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4527 new_temp = make_ssa_name (vec_dest, new_stmt);
4528 gimple_assign_set_lhs (new_stmt, new_temp);
4529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4530 if (slp_node)
4531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4532 }
4533
4534 if (slp_node)
4535 continue;
4536
4537 if (j == 0)
4538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4539 else
4540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4541 prev_stmt_info = vinfo_for_stmt (new_stmt);
4542 }
4543
4544 vec_oprnds0.release ();
4545 vec_oprnds1.release ();
4546
4547 return true;
4548 }
4549
4550
4551 /* Function vectorizable_operation.
4552
4553 Check if STMT performs a binary, unary or ternary operation that can
4554 be vectorized.
4555 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4556 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4558
4559 static bool
4560 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4561 gimple *vec_stmt, slp_tree slp_node)
4562 {
4563 tree vec_dest;
4564 tree scalar_dest;
4565 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4567 tree vectype;
4568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4569 enum tree_code code;
4570 machine_mode vec_mode;
4571 tree new_temp;
4572 int op_type;
4573 optab optab;
4574 int icode;
4575 tree def;
4576 gimple def_stmt;
4577 enum vect_def_type dt[3]
4578 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4579 gimple new_stmt = NULL;
4580 stmt_vec_info prev_stmt_info;
4581 int nunits_in;
4582 int nunits_out;
4583 tree vectype_out;
4584 int ncopies;
4585 int j, i;
4586 vec<tree> vec_oprnds0 = vNULL;
4587 vec<tree> vec_oprnds1 = vNULL;
4588 vec<tree> vec_oprnds2 = vNULL;
4589 tree vop0, vop1, vop2;
4590 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4591 int vf;
4592
4593 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4594 return false;
4595
4596 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4597 return false;
4598
4599 /* Is STMT a vectorizable binary/unary operation? */
4600 if (!is_gimple_assign (stmt))
4601 return false;
4602
4603 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4604 return false;
4605
4606 code = gimple_assign_rhs_code (stmt);
4607
4608 /* For pointer addition, we should use the normal plus for
4609 the vector addition. */
4610 if (code == POINTER_PLUS_EXPR)
4611 code = PLUS_EXPR;
4612
4613 /* Support only unary or binary operations. */
4614 op_type = TREE_CODE_LENGTH (code);
4615 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4616 {
4617 if (dump_enabled_p ())
4618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4619 "num. args = %d (not unary/binary/ternary op).\n",
4620 op_type);
4621 return false;
4622 }
4623
4624 scalar_dest = gimple_assign_lhs (stmt);
4625 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4626
4627 /* Most operations cannot handle bit-precision types without extra
4628 truncations. */
4629 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4630 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4631 /* Exception are bitwise binary operations. */
4632 && code != BIT_IOR_EXPR
4633 && code != BIT_XOR_EXPR
4634 && code != BIT_AND_EXPR)
4635 {
4636 if (dump_enabled_p ())
4637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4638 "bit-precision arithmetic not supported.\n");
4639 return false;
4640 }
4641
4642 op0 = gimple_assign_rhs1 (stmt);
4643 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4644 &def_stmt, &def, &dt[0], &vectype))
4645 {
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 "use not simple.\n");
4649 return false;
4650 }
4651 /* If op0 is an external or constant def use a vector type with
4652 the same size as the output vector type. */
4653 if (!vectype)
4654 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4655 if (vec_stmt)
4656 gcc_assert (vectype);
4657 if (!vectype)
4658 {
4659 if (dump_enabled_p ())
4660 {
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4662 "no vectype for scalar type ");
4663 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4664 TREE_TYPE (op0));
4665 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4666 }
4667
4668 return false;
4669 }
4670
4671 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4672 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4673 if (nunits_out != nunits_in)
4674 return false;
4675
4676 if (op_type == binary_op || op_type == ternary_op)
4677 {
4678 op1 = gimple_assign_rhs2 (stmt);
4679 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4680 &def, &dt[1]))
4681 {
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4684 "use not simple.\n");
4685 return false;
4686 }
4687 }
4688 if (op_type == ternary_op)
4689 {
4690 op2 = gimple_assign_rhs3 (stmt);
4691 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4692 &def, &dt[2]))
4693 {
4694 if (dump_enabled_p ())
4695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4696 "use not simple.\n");
4697 return false;
4698 }
4699 }
4700
4701 if (loop_vinfo)
4702 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4703 else
4704 vf = 1;
4705
4706 /* Multiple types in SLP are handled by creating the appropriate number of
4707 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4708 case of SLP. */
4709 if (slp_node || PURE_SLP_STMT (stmt_info))
4710 ncopies = 1;
4711 else
4712 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4713
4714 gcc_assert (ncopies >= 1);
4715
4716 /* Shifts are handled in vectorizable_shift (). */
4717 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4718 || code == RROTATE_EXPR)
4719 return false;
4720
4721 /* Supportable by target? */
4722
4723 vec_mode = TYPE_MODE (vectype);
4724 if (code == MULT_HIGHPART_EXPR)
4725 {
4726 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4727 icode = LAST_INSN_CODE;
4728 else
4729 icode = CODE_FOR_nothing;
4730 }
4731 else
4732 {
4733 optab = optab_for_tree_code (code, vectype, optab_default);
4734 if (!optab)
4735 {
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "no optab.\n");
4739 return false;
4740 }
4741 icode = (int) optab_handler (optab, vec_mode);
4742 }
4743
4744 if (icode == CODE_FOR_nothing)
4745 {
4746 if (dump_enabled_p ())
4747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4748 "op not supported by target.\n");
4749 /* Check only during analysis. */
4750 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4751 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4752 return false;
4753 if (dump_enabled_p ())
4754 dump_printf_loc (MSG_NOTE, vect_location,
4755 "proceeding using word mode.\n");
4756 }
4757
4758 /* Worthwhile without SIMD support? Check only during analysis. */
4759 if (!VECTOR_MODE_P (vec_mode)
4760 && !vec_stmt
4761 && vf < vect_min_worthwhile_factor (code))
4762 {
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4765 "not worthwhile without SIMD support.\n");
4766 return false;
4767 }
4768
4769 if (!vec_stmt) /* transformation not required. */
4770 {
4771 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4772 if (dump_enabled_p ())
4773 dump_printf_loc (MSG_NOTE, vect_location,
4774 "=== vectorizable_operation ===\n");
4775 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4776 return true;
4777 }
4778
4779 /** Transform. **/
4780
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_NOTE, vect_location,
4783 "transform binary/unary operation.\n");
4784
4785 /* Handle def. */
4786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4787
4788 /* In case the vectorization factor (VF) is bigger than the number
4789 of elements that we can fit in a vectype (nunits), we have to generate
4790 more than one vector stmt - i.e - we need to "unroll" the
4791 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4792 from one copy of the vector stmt to the next, in the field
4793 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4794 stages to find the correct vector defs to be used when vectorizing
4795 stmts that use the defs of the current stmt. The example below
4796 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4797 we need to create 4 vectorized stmts):
4798
4799 before vectorization:
4800 RELATED_STMT VEC_STMT
4801 S1: x = memref - -
4802 S2: z = x + 1 - -
4803
4804 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4805 there):
4806 RELATED_STMT VEC_STMT
4807 VS1_0: vx0 = memref0 VS1_1 -
4808 VS1_1: vx1 = memref1 VS1_2 -
4809 VS1_2: vx2 = memref2 VS1_3 -
4810 VS1_3: vx3 = memref3 - -
4811 S1: x = load - VS1_0
4812 S2: z = x + 1 - -
4813
4814 step2: vectorize stmt S2 (done here):
4815 To vectorize stmt S2 we first need to find the relevant vector
4816 def for the first operand 'x'. This is, as usual, obtained from
4817 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4818 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4819 relevant vector def 'vx0'. Having found 'vx0' we can generate
4820 the vector stmt VS2_0, and as usual, record it in the
4821 STMT_VINFO_VEC_STMT of stmt S2.
4822 When creating the second copy (VS2_1), we obtain the relevant vector
4823 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4824 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4825 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4826 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4827 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4828 chain of stmts and pointers:
4829 RELATED_STMT VEC_STMT
4830 VS1_0: vx0 = memref0 VS1_1 -
4831 VS1_1: vx1 = memref1 VS1_2 -
4832 VS1_2: vx2 = memref2 VS1_3 -
4833 VS1_3: vx3 = memref3 - -
4834 S1: x = load - VS1_0
4835 VS2_0: vz0 = vx0 + v1 VS2_1 -
4836 VS2_1: vz1 = vx1 + v1 VS2_2 -
4837 VS2_2: vz2 = vx2 + v1 VS2_3 -
4838 VS2_3: vz3 = vx3 + v1 - -
4839 S2: z = x + 1 - VS2_0 */
4840
4841 prev_stmt_info = NULL;
4842 for (j = 0; j < ncopies; j++)
4843 {
4844 /* Handle uses. */
4845 if (j == 0)
4846 {
4847 if (op_type == binary_op || op_type == ternary_op)
4848 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4849 slp_node, -1);
4850 else
4851 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4852 slp_node, -1);
4853 if (op_type == ternary_op)
4854 {
4855 vec_oprnds2.create (1);
4856 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4857 stmt,
4858 NULL));
4859 }
4860 }
4861 else
4862 {
4863 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4864 if (op_type == ternary_op)
4865 {
4866 tree vec_oprnd = vec_oprnds2.pop ();
4867 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4868 vec_oprnd));
4869 }
4870 }
4871
4872 /* Arguments are ready. Create the new vector stmt. */
4873 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4874 {
4875 vop1 = ((op_type == binary_op || op_type == ternary_op)
4876 ? vec_oprnds1[i] : NULL_TREE);
4877 vop2 = ((op_type == ternary_op)
4878 ? vec_oprnds2[i] : NULL_TREE);
4879 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4880 vop0, vop1, vop2);
4881 new_temp = make_ssa_name (vec_dest, new_stmt);
4882 gimple_assign_set_lhs (new_stmt, new_temp);
4883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4884 if (slp_node)
4885 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4886 }
4887
4888 if (slp_node)
4889 continue;
4890
4891 if (j == 0)
4892 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4893 else
4894 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4895 prev_stmt_info = vinfo_for_stmt (new_stmt);
4896 }
4897
4898 vec_oprnds0.release ();
4899 vec_oprnds1.release ();
4900 vec_oprnds2.release ();
4901
4902 return true;
4903 }
4904
4905 /* A helper function to ensure data reference DR's base alignment
4906 for STMT_INFO. */
4907
4908 static void
4909 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4910 {
4911 if (!dr->aux)
4912 return;
4913
4914 if (((dataref_aux *)dr->aux)->base_misaligned)
4915 {
4916 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4917 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4918
4919 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4920 DECL_USER_ALIGN (base_decl) = 1;
4921 ((dataref_aux *)dr->aux)->base_misaligned = false;
4922 }
4923 }
4924
4925
4926 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4927 reversal of the vector elements. If that is impossible to do,
4928 returns NULL. */
4929
4930 static tree
4931 perm_mask_for_reverse (tree vectype)
4932 {
4933 int i, nunits;
4934 unsigned char *sel;
4935
4936 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4937 sel = XALLOCAVEC (unsigned char, nunits);
4938
4939 for (i = 0; i < nunits; ++i)
4940 sel[i] = nunits - 1 - i;
4941
4942 return vect_gen_perm_mask (vectype, sel);
4943 }
4944
4945 /* Function vectorizable_store.
4946
4947 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4948 can be vectorized.
4949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4950 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4951 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4952
4953 static bool
4954 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4955 slp_tree slp_node)
4956 {
4957 tree scalar_dest;
4958 tree data_ref;
4959 tree op;
4960 tree vec_oprnd = NULL_TREE;
4961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4962 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4964 tree elem_type;
4965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4966 struct loop *loop = NULL;
4967 machine_mode vec_mode;
4968 tree dummy;
4969 enum dr_alignment_support alignment_support_scheme;
4970 tree def;
4971 gimple def_stmt;
4972 enum vect_def_type dt;
4973 stmt_vec_info prev_stmt_info = NULL;
4974 tree dataref_ptr = NULL_TREE;
4975 tree dataref_offset = NULL_TREE;
4976 gimple ptr_incr = NULL;
4977 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4978 int ncopies;
4979 int j;
4980 gimple next_stmt, first_stmt = NULL;
4981 bool grouped_store = false;
4982 bool store_lanes_p = false;
4983 unsigned int group_size, i;
4984 vec<tree> dr_chain = vNULL;
4985 vec<tree> oprnds = vNULL;
4986 vec<tree> result_chain = vNULL;
4987 bool inv_p;
4988 bool negative = false;
4989 tree offset = NULL_TREE;
4990 vec<tree> vec_oprnds = vNULL;
4991 bool slp = (slp_node != NULL);
4992 unsigned int vec_num;
4993 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4994 tree aggr_type;
4995
4996 if (loop_vinfo)
4997 loop = LOOP_VINFO_LOOP (loop_vinfo);
4998
4999 /* Multiple types in SLP are handled by creating the appropriate number of
5000 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5001 case of SLP. */
5002 if (slp || PURE_SLP_STMT (stmt_info))
5003 ncopies = 1;
5004 else
5005 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5006
5007 gcc_assert (ncopies >= 1);
5008
5009 /* FORNOW. This restriction should be relaxed. */
5010 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5011 {
5012 if (dump_enabled_p ())
5013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5014 "multiple types in nested loop.\n");
5015 return false;
5016 }
5017
5018 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5019 return false;
5020
5021 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5022 return false;
5023
5024 /* Is vectorizable store? */
5025
5026 if (!is_gimple_assign (stmt))
5027 return false;
5028
5029 scalar_dest = gimple_assign_lhs (stmt);
5030 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5031 && is_pattern_stmt_p (stmt_info))
5032 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5033 if (TREE_CODE (scalar_dest) != ARRAY_REF
5034 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5035 && TREE_CODE (scalar_dest) != INDIRECT_REF
5036 && TREE_CODE (scalar_dest) != COMPONENT_REF
5037 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5038 && TREE_CODE (scalar_dest) != REALPART_EXPR
5039 && TREE_CODE (scalar_dest) != MEM_REF)
5040 return false;
5041
5042 gcc_assert (gimple_assign_single_p (stmt));
5043 op = gimple_assign_rhs1 (stmt);
5044 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5045 &def, &dt))
5046 {
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049 "use not simple.\n");
5050 return false;
5051 }
5052
5053 elem_type = TREE_TYPE (vectype);
5054 vec_mode = TYPE_MODE (vectype);
5055
5056 /* FORNOW. In some cases can vectorize even if data-type not supported
5057 (e.g. - array initialization with 0). */
5058 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5059 return false;
5060
5061 if (!STMT_VINFO_DATA_REF (stmt_info))
5062 return false;
5063
5064 negative =
5065 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5066 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5067 size_zero_node) < 0;
5068 if (negative && ncopies > 1)
5069 {
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "multiple types with negative step.\n");
5073 return false;
5074 }
5075
5076 if (negative)
5077 {
5078 gcc_assert (!grouped_store);
5079 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5080 if (alignment_support_scheme != dr_aligned
5081 && alignment_support_scheme != dr_unaligned_supported)
5082 {
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "negative step but alignment required.\n");
5086 return false;
5087 }
5088 if (dt != vect_constant_def
5089 && dt != vect_external_def
5090 && !perm_mask_for_reverse (vectype))
5091 {
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5094 "negative step and reversing not supported.\n");
5095 return false;
5096 }
5097 }
5098
5099 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5100 {
5101 grouped_store = true;
5102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5103 if (!slp && !PURE_SLP_STMT (stmt_info))
5104 {
5105 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5106 if (vect_store_lanes_supported (vectype, group_size))
5107 store_lanes_p = true;
5108 else if (!vect_grouped_store_supported (vectype, group_size))
5109 return false;
5110 }
5111
5112 if (first_stmt == stmt)
5113 {
5114 /* STMT is the leader of the group. Check the operands of all the
5115 stmts of the group. */
5116 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5117 while (next_stmt)
5118 {
5119 gcc_assert (gimple_assign_single_p (next_stmt));
5120 op = gimple_assign_rhs1 (next_stmt);
5121 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5122 &def_stmt, &def, &dt))
5123 {
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5126 "use not simple.\n");
5127 return false;
5128 }
5129 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5130 }
5131 }
5132 }
5133
5134 if (!vec_stmt) /* transformation not required. */
5135 {
5136 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5137 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5138 NULL, NULL, NULL);
5139 return true;
5140 }
5141
5142 /** Transform. **/
5143
5144 ensure_base_align (stmt_info, dr);
5145
5146 if (grouped_store)
5147 {
5148 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5149 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5150
5151 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5152
5153 /* FORNOW */
5154 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5155
5156 /* We vectorize all the stmts of the interleaving group when we
5157 reach the last stmt in the group. */
5158 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5159 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5160 && !slp)
5161 {
5162 *vec_stmt = NULL;
5163 return true;
5164 }
5165
5166 if (slp)
5167 {
5168 grouped_store = false;
5169 /* VEC_NUM is the number of vect stmts to be created for this
5170 group. */
5171 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5172 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5173 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5174 op = gimple_assign_rhs1 (first_stmt);
5175 }
5176 else
5177 /* VEC_NUM is the number of vect stmts to be created for this
5178 group. */
5179 vec_num = group_size;
5180 }
5181 else
5182 {
5183 first_stmt = stmt;
5184 first_dr = dr;
5185 group_size = vec_num = 1;
5186 }
5187
5188 if (dump_enabled_p ())
5189 dump_printf_loc (MSG_NOTE, vect_location,
5190 "transform store. ncopies = %d\n", ncopies);
5191
5192 dr_chain.create (group_size);
5193 oprnds.create (group_size);
5194
5195 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5196 gcc_assert (alignment_support_scheme);
5197 /* Targets with store-lane instructions must not require explicit
5198 realignment. */
5199 gcc_assert (!store_lanes_p
5200 || alignment_support_scheme == dr_aligned
5201 || alignment_support_scheme == dr_unaligned_supported);
5202
5203 if (negative)
5204 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5205
5206 if (store_lanes_p)
5207 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5208 else
5209 aggr_type = vectype;
5210
5211 /* In case the vectorization factor (VF) is bigger than the number
5212 of elements that we can fit in a vectype (nunits), we have to generate
5213 more than one vector stmt - i.e - we need to "unroll" the
5214 vector stmt by a factor VF/nunits. For more details see documentation in
5215 vect_get_vec_def_for_copy_stmt. */
5216
5217 /* In case of interleaving (non-unit grouped access):
5218
5219 S1: &base + 2 = x2
5220 S2: &base = x0
5221 S3: &base + 1 = x1
5222 S4: &base + 3 = x3
5223
5224 We create vectorized stores starting from base address (the access of the
5225 first stmt in the chain (S2 in the above example), when the last store stmt
5226 of the chain (S4) is reached:
5227
5228 VS1: &base = vx2
5229 VS2: &base + vec_size*1 = vx0
5230 VS3: &base + vec_size*2 = vx1
5231 VS4: &base + vec_size*3 = vx3
5232
5233 Then permutation statements are generated:
5234
5235 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5236 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5237 ...
5238
5239 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5240 (the order of the data-refs in the output of vect_permute_store_chain
5241 corresponds to the order of scalar stmts in the interleaving chain - see
5242 the documentation of vect_permute_store_chain()).
5243
5244 In case of both multiple types and interleaving, above vector stores and
5245 permutation stmts are created for every copy. The result vector stmts are
5246 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5247 STMT_VINFO_RELATED_STMT for the next copies.
5248 */
5249
5250 prev_stmt_info = NULL;
5251 for (j = 0; j < ncopies; j++)
5252 {
5253 gimple new_stmt;
5254
5255 if (j == 0)
5256 {
5257 if (slp)
5258 {
5259 /* Get vectorized arguments for SLP_NODE. */
5260 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5261 NULL, slp_node, -1);
5262
5263 vec_oprnd = vec_oprnds[0];
5264 }
5265 else
5266 {
5267 /* For interleaved stores we collect vectorized defs for all the
5268 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5269 used as an input to vect_permute_store_chain(), and OPRNDS as
5270 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5271
5272 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5273 OPRNDS are of size 1. */
5274 next_stmt = first_stmt;
5275 for (i = 0; i < group_size; i++)
5276 {
5277 /* Since gaps are not supported for interleaved stores,
5278 GROUP_SIZE is the exact number of stmts in the chain.
5279 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5280 there is no interleaving, GROUP_SIZE is 1, and only one
5281 iteration of the loop will be executed. */
5282 gcc_assert (next_stmt
5283 && gimple_assign_single_p (next_stmt));
5284 op = gimple_assign_rhs1 (next_stmt);
5285
5286 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5287 NULL);
5288 dr_chain.quick_push (vec_oprnd);
5289 oprnds.quick_push (vec_oprnd);
5290 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5291 }
5292 }
5293
5294 /* We should have catched mismatched types earlier. */
5295 gcc_assert (useless_type_conversion_p (vectype,
5296 TREE_TYPE (vec_oprnd)));
5297 bool simd_lane_access_p
5298 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5299 if (simd_lane_access_p
5300 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5301 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5302 && integer_zerop (DR_OFFSET (first_dr))
5303 && integer_zerop (DR_INIT (first_dr))
5304 && alias_sets_conflict_p (get_alias_set (aggr_type),
5305 get_alias_set (DR_REF (first_dr))))
5306 {
5307 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5308 dataref_offset = build_int_cst (reference_alias_ptr_type
5309 (DR_REF (first_dr)), 0);
5310 inv_p = false;
5311 }
5312 else
5313 dataref_ptr
5314 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5315 simd_lane_access_p ? loop : NULL,
5316 offset, &dummy, gsi, &ptr_incr,
5317 simd_lane_access_p, &inv_p);
5318 gcc_assert (bb_vinfo || !inv_p);
5319 }
5320 else
5321 {
5322 /* For interleaved stores we created vectorized defs for all the
5323 defs stored in OPRNDS in the previous iteration (previous copy).
5324 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5325 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5326 next copy.
5327 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5328 OPRNDS are of size 1. */
5329 for (i = 0; i < group_size; i++)
5330 {
5331 op = oprnds[i];
5332 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5333 &def, &dt);
5334 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5335 dr_chain[i] = vec_oprnd;
5336 oprnds[i] = vec_oprnd;
5337 }
5338 if (dataref_offset)
5339 dataref_offset
5340 = int_const_binop (PLUS_EXPR, dataref_offset,
5341 TYPE_SIZE_UNIT (aggr_type));
5342 else
5343 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5344 TYPE_SIZE_UNIT (aggr_type));
5345 }
5346
5347 if (store_lanes_p)
5348 {
5349 tree vec_array;
5350
5351 /* Combine all the vectors into an array. */
5352 vec_array = create_vector_array (vectype, vec_num);
5353 for (i = 0; i < vec_num; i++)
5354 {
5355 vec_oprnd = dr_chain[i];
5356 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5357 }
5358
5359 /* Emit:
5360 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5361 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5362 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5363 gimple_call_set_lhs (new_stmt, data_ref);
5364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5365 }
5366 else
5367 {
5368 new_stmt = NULL;
5369 if (grouped_store)
5370 {
5371 if (j == 0)
5372 result_chain.create (group_size);
5373 /* Permute. */
5374 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5375 &result_chain);
5376 }
5377
5378 next_stmt = first_stmt;
5379 for (i = 0; i < vec_num; i++)
5380 {
5381 unsigned align, misalign;
5382
5383 if (i > 0)
5384 /* Bump the vector pointer. */
5385 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5386 stmt, NULL_TREE);
5387
5388 if (slp)
5389 vec_oprnd = vec_oprnds[i];
5390 else if (grouped_store)
5391 /* For grouped stores vectorized defs are interleaved in
5392 vect_permute_store_chain(). */
5393 vec_oprnd = result_chain[i];
5394
5395 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5396 dataref_offset
5397 ? dataref_offset
5398 : build_int_cst (reference_alias_ptr_type
5399 (DR_REF (first_dr)), 0));
5400 align = TYPE_ALIGN_UNIT (vectype);
5401 if (aligned_access_p (first_dr))
5402 misalign = 0;
5403 else if (DR_MISALIGNMENT (first_dr) == -1)
5404 {
5405 TREE_TYPE (data_ref)
5406 = build_aligned_type (TREE_TYPE (data_ref),
5407 TYPE_ALIGN (elem_type));
5408 align = TYPE_ALIGN_UNIT (elem_type);
5409 misalign = 0;
5410 }
5411 else
5412 {
5413 TREE_TYPE (data_ref)
5414 = build_aligned_type (TREE_TYPE (data_ref),
5415 TYPE_ALIGN (elem_type));
5416 misalign = DR_MISALIGNMENT (first_dr);
5417 }
5418 if (dataref_offset == NULL_TREE)
5419 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5420 misalign);
5421
5422 if (negative
5423 && dt != vect_constant_def
5424 && dt != vect_external_def)
5425 {
5426 tree perm_mask = perm_mask_for_reverse (vectype);
5427 tree perm_dest
5428 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5429 vectype);
5430 tree new_temp = make_ssa_name (perm_dest, NULL);
5431
5432 /* Generate the permute statement. */
5433 gimple perm_stmt
5434 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5435 vec_oprnd, vec_oprnd,
5436 perm_mask);
5437 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5438
5439 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5440 vec_oprnd = new_temp;
5441 }
5442
5443 /* Arguments are ready. Create the new vector stmt. */
5444 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5445 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5446
5447 if (slp)
5448 continue;
5449
5450 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5451 if (!next_stmt)
5452 break;
5453 }
5454 }
5455 if (!slp)
5456 {
5457 if (j == 0)
5458 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5459 else
5460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5461 prev_stmt_info = vinfo_for_stmt (new_stmt);
5462 }
5463 }
5464
5465 dr_chain.release ();
5466 oprnds.release ();
5467 result_chain.release ();
5468 vec_oprnds.release ();
5469
5470 return true;
5471 }
5472
5473 /* Given a vector type VECTYPE and permutation SEL returns
5474 the VECTOR_CST mask that implements the permutation of the
5475 vector elements. If that is impossible to do, returns NULL. */
5476
5477 tree
5478 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5479 {
5480 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5481 int i, nunits;
5482
5483 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5484
5485 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5486 return NULL;
5487
5488 mask_elt_type = lang_hooks.types.type_for_mode
5489 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5490 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5491
5492 mask_elts = XALLOCAVEC (tree, nunits);
5493 for (i = nunits - 1; i >= 0; i--)
5494 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5495 mask_vec = build_vector (mask_type, mask_elts);
5496
5497 return mask_vec;
5498 }
5499
5500 /* Given a vector variable X and Y, that was generated for the scalar
5501 STMT, generate instructions to permute the vector elements of X and Y
5502 using permutation mask MASK_VEC, insert them at *GSI and return the
5503 permuted vector variable. */
5504
5505 static tree
5506 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5507 gimple_stmt_iterator *gsi)
5508 {
5509 tree vectype = TREE_TYPE (x);
5510 tree perm_dest, data_ref;
5511 gimple perm_stmt;
5512
5513 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5514 data_ref = make_ssa_name (perm_dest, NULL);
5515
5516 /* Generate the permute statement. */
5517 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5518 x, y, mask_vec);
5519 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5520
5521 return data_ref;
5522 }
5523
5524 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5525 inserting them on the loops preheader edge. Returns true if we
5526 were successful in doing so (and thus STMT can be moved then),
5527 otherwise returns false. */
5528
5529 static bool
5530 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5531 {
5532 ssa_op_iter i;
5533 tree op;
5534 bool any = false;
5535
5536 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5537 {
5538 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5539 if (!gimple_nop_p (def_stmt)
5540 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5541 {
5542 /* Make sure we don't need to recurse. While we could do
5543 so in simple cases when there are more complex use webs
5544 we don't have an easy way to preserve stmt order to fulfil
5545 dependencies within them. */
5546 tree op2;
5547 ssa_op_iter i2;
5548 if (gimple_code (def_stmt) == GIMPLE_PHI)
5549 return false;
5550 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5551 {
5552 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5553 if (!gimple_nop_p (def_stmt2)
5554 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5555 return false;
5556 }
5557 any = true;
5558 }
5559 }
5560
5561 if (!any)
5562 return true;
5563
5564 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5565 {
5566 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5567 if (!gimple_nop_p (def_stmt)
5568 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5569 {
5570 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5571 gsi_remove (&gsi, false);
5572 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5573 }
5574 }
5575
5576 return true;
5577 }
5578
5579 /* vectorizable_load.
5580
5581 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5582 can be vectorized.
5583 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5584 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5585 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5586
5587 static bool
5588 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5589 slp_tree slp_node, slp_instance slp_node_instance)
5590 {
5591 tree scalar_dest;
5592 tree vec_dest = NULL;
5593 tree data_ref = NULL;
5594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5595 stmt_vec_info prev_stmt_info;
5596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5597 struct loop *loop = NULL;
5598 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5599 bool nested_in_vect_loop = false;
5600 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5601 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5602 tree elem_type;
5603 tree new_temp;
5604 machine_mode mode;
5605 gimple new_stmt = NULL;
5606 tree dummy;
5607 enum dr_alignment_support alignment_support_scheme;
5608 tree dataref_ptr = NULL_TREE;
5609 tree dataref_offset = NULL_TREE;
5610 gimple ptr_incr = NULL;
5611 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5612 int ncopies;
5613 int i, j, group_size, group_gap;
5614 tree msq = NULL_TREE, lsq;
5615 tree offset = NULL_TREE;
5616 tree byte_offset = NULL_TREE;
5617 tree realignment_token = NULL_TREE;
5618 gimple phi = NULL;
5619 vec<tree> dr_chain = vNULL;
5620 bool grouped_load = false;
5621 bool load_lanes_p = false;
5622 gimple first_stmt;
5623 bool inv_p;
5624 bool negative = false;
5625 bool compute_in_loop = false;
5626 struct loop *at_loop;
5627 int vec_num;
5628 bool slp = (slp_node != NULL);
5629 bool slp_perm = false;
5630 enum tree_code code;
5631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5632 int vf;
5633 tree aggr_type;
5634 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5635 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5636 int gather_scale = 1;
5637 enum vect_def_type gather_dt = vect_unknown_def_type;
5638
5639 if (loop_vinfo)
5640 {
5641 loop = LOOP_VINFO_LOOP (loop_vinfo);
5642 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5643 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5644 }
5645 else
5646 vf = 1;
5647
5648 /* Multiple types in SLP are handled by creating the appropriate number of
5649 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5650 case of SLP. */
5651 if (slp || PURE_SLP_STMT (stmt_info))
5652 ncopies = 1;
5653 else
5654 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5655
5656 gcc_assert (ncopies >= 1);
5657
5658 /* FORNOW. This restriction should be relaxed. */
5659 if (nested_in_vect_loop && ncopies > 1)
5660 {
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5663 "multiple types in nested loop.\n");
5664 return false;
5665 }
5666
5667 /* Invalidate assumptions made by dependence analysis when vectorization
5668 on the unrolled body effectively re-orders stmts. */
5669 if (ncopies > 1
5670 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5671 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5672 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5673 {
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5676 "cannot perform implicit CSE when unrolling "
5677 "with negative dependence distance\n");
5678 return false;
5679 }
5680
5681 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5682 return false;
5683
5684 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5685 return false;
5686
5687 /* Is vectorizable load? */
5688 if (!is_gimple_assign (stmt))
5689 return false;
5690
5691 scalar_dest = gimple_assign_lhs (stmt);
5692 if (TREE_CODE (scalar_dest) != SSA_NAME)
5693 return false;
5694
5695 code = gimple_assign_rhs_code (stmt);
5696 if (code != ARRAY_REF
5697 && code != BIT_FIELD_REF
5698 && code != INDIRECT_REF
5699 && code != COMPONENT_REF
5700 && code != IMAGPART_EXPR
5701 && code != REALPART_EXPR
5702 && code != MEM_REF
5703 && TREE_CODE_CLASS (code) != tcc_declaration)
5704 return false;
5705
5706 if (!STMT_VINFO_DATA_REF (stmt_info))
5707 return false;
5708
5709 elem_type = TREE_TYPE (vectype);
5710 mode = TYPE_MODE (vectype);
5711
5712 /* FORNOW. In some cases can vectorize even if data-type not supported
5713 (e.g. - data copies). */
5714 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5715 {
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "Aligned load, but unsupported type.\n");
5719 return false;
5720 }
5721
5722 /* Check if the load is a part of an interleaving chain. */
5723 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5724 {
5725 grouped_load = true;
5726 /* FORNOW */
5727 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5728
5729 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5730 if (!slp && !PURE_SLP_STMT (stmt_info))
5731 {
5732 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5733 if (vect_load_lanes_supported (vectype, group_size))
5734 load_lanes_p = true;
5735 else if (!vect_grouped_load_supported (vectype, group_size))
5736 return false;
5737 }
5738
5739 /* Invalidate assumptions made by dependence analysis when vectorization
5740 on the unrolled body effectively re-orders stmts. */
5741 if (!PURE_SLP_STMT (stmt_info)
5742 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5743 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5744 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5745 {
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748 "cannot perform implicit CSE when performing "
5749 "group loads with negative dependence distance\n");
5750 return false;
5751 }
5752 }
5753
5754
5755 if (STMT_VINFO_GATHER_P (stmt_info))
5756 {
5757 gimple def_stmt;
5758 tree def;
5759 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5760 &gather_off, &gather_scale);
5761 gcc_assert (gather_decl);
5762 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5763 &def_stmt, &def, &gather_dt,
5764 &gather_off_vectype))
5765 {
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "gather index use not simple.\n");
5769 return false;
5770 }
5771 }
5772 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5773 ;
5774 else
5775 {
5776 negative = tree_int_cst_compare (nested_in_vect_loop
5777 ? STMT_VINFO_DR_STEP (stmt_info)
5778 : DR_STEP (dr),
5779 size_zero_node) < 0;
5780 if (negative && ncopies > 1)
5781 {
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784 "multiple types with negative step.\n");
5785 return false;
5786 }
5787
5788 if (negative)
5789 {
5790 if (grouped_load)
5791 {
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794 "negative step for group load not supported"
5795 "\n");
5796 return false;
5797 }
5798 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5799 if (alignment_support_scheme != dr_aligned
5800 && alignment_support_scheme != dr_unaligned_supported)
5801 {
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 "negative step but alignment required.\n");
5805 return false;
5806 }
5807 if (!perm_mask_for_reverse (vectype))
5808 {
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "negative step and reversing not supported."
5812 "\n");
5813 return false;
5814 }
5815 }
5816 }
5817
5818 if (!vec_stmt) /* transformation not required. */
5819 {
5820 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5821 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5822 return true;
5823 }
5824
5825 if (dump_enabled_p ())
5826 dump_printf_loc (MSG_NOTE, vect_location,
5827 "transform load. ncopies = %d\n", ncopies);
5828
5829 /** Transform. **/
5830
5831 ensure_base_align (stmt_info, dr);
5832
5833 if (STMT_VINFO_GATHER_P (stmt_info))
5834 {
5835 tree vec_oprnd0 = NULL_TREE, op;
5836 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5837 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5838 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5839 edge pe = loop_preheader_edge (loop);
5840 gimple_seq seq;
5841 basic_block new_bb;
5842 enum { NARROW, NONE, WIDEN } modifier;
5843 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5844
5845 if (nunits == gather_off_nunits)
5846 modifier = NONE;
5847 else if (nunits == gather_off_nunits / 2)
5848 {
5849 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5850 modifier = WIDEN;
5851
5852 for (i = 0; i < gather_off_nunits; ++i)
5853 sel[i] = i | nunits;
5854
5855 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5856 gcc_assert (perm_mask != NULL_TREE);
5857 }
5858 else if (nunits == gather_off_nunits * 2)
5859 {
5860 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5861 modifier = NARROW;
5862
5863 for (i = 0; i < nunits; ++i)
5864 sel[i] = i < gather_off_nunits
5865 ? i : i + nunits - gather_off_nunits;
5866
5867 perm_mask = vect_gen_perm_mask (vectype, sel);
5868 gcc_assert (perm_mask != NULL_TREE);
5869 ncopies *= 2;
5870 }
5871 else
5872 gcc_unreachable ();
5873
5874 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5875 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5876 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5877 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5879 scaletype = TREE_VALUE (arglist);
5880 gcc_checking_assert (types_compatible_p (srctype, rettype));
5881
5882 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5883
5884 ptr = fold_convert (ptrtype, gather_base);
5885 if (!is_gimple_min_invariant (ptr))
5886 {
5887 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5888 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5889 gcc_assert (!new_bb);
5890 }
5891
5892 /* Currently we support only unconditional gather loads,
5893 so mask should be all ones. */
5894 if (TREE_CODE (masktype) == INTEGER_TYPE)
5895 mask = build_int_cst (masktype, -1);
5896 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5897 {
5898 mask = build_int_cst (TREE_TYPE (masktype), -1);
5899 mask = build_vector_from_val (masktype, mask);
5900 mask = vect_init_vector (stmt, mask, masktype, NULL);
5901 }
5902 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5903 {
5904 REAL_VALUE_TYPE r;
5905 long tmp[6];
5906 for (j = 0; j < 6; ++j)
5907 tmp[j] = -1;
5908 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5909 mask = build_real (TREE_TYPE (masktype), r);
5910 mask = build_vector_from_val (masktype, mask);
5911 mask = vect_init_vector (stmt, mask, masktype, NULL);
5912 }
5913 else
5914 gcc_unreachable ();
5915
5916 scale = build_int_cst (scaletype, gather_scale);
5917
5918 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5919 merge = build_int_cst (TREE_TYPE (rettype), 0);
5920 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5921 {
5922 REAL_VALUE_TYPE r;
5923 long tmp[6];
5924 for (j = 0; j < 6; ++j)
5925 tmp[j] = 0;
5926 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5927 merge = build_real (TREE_TYPE (rettype), r);
5928 }
5929 else
5930 gcc_unreachable ();
5931 merge = build_vector_from_val (rettype, merge);
5932 merge = vect_init_vector (stmt, merge, rettype, NULL);
5933
5934 prev_stmt_info = NULL;
5935 for (j = 0; j < ncopies; ++j)
5936 {
5937 if (modifier == WIDEN && (j & 1))
5938 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5939 perm_mask, stmt, gsi);
5940 else if (j == 0)
5941 op = vec_oprnd0
5942 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5943 else
5944 op = vec_oprnd0
5945 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5946
5947 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5948 {
5949 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5950 == TYPE_VECTOR_SUBPARTS (idxtype));
5951 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5952 var = make_ssa_name (var, NULL);
5953 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5954 new_stmt
5955 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5956 op, NULL_TREE);
5957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5958 op = var;
5959 }
5960
5961 new_stmt
5962 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5963
5964 if (!useless_type_conversion_p (vectype, rettype))
5965 {
5966 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5967 == TYPE_VECTOR_SUBPARTS (rettype));
5968 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5969 op = make_ssa_name (var, new_stmt);
5970 gimple_call_set_lhs (new_stmt, op);
5971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5972 var = make_ssa_name (vec_dest, NULL);
5973 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5974 new_stmt
5975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5976 NULL_TREE);
5977 }
5978 else
5979 {
5980 var = make_ssa_name (vec_dest, new_stmt);
5981 gimple_call_set_lhs (new_stmt, var);
5982 }
5983
5984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5985
5986 if (modifier == NARROW)
5987 {
5988 if ((j & 1) == 0)
5989 {
5990 prev_res = var;
5991 continue;
5992 }
5993 var = permute_vec_elements (prev_res, var,
5994 perm_mask, stmt, gsi);
5995 new_stmt = SSA_NAME_DEF_STMT (var);
5996 }
5997
5998 if (prev_stmt_info == NULL)
5999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6000 else
6001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6002 prev_stmt_info = vinfo_for_stmt (new_stmt);
6003 }
6004 return true;
6005 }
6006 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6007 {
6008 gimple_stmt_iterator incr_gsi;
6009 bool insert_after;
6010 gimple incr;
6011 tree offvar;
6012 tree ivstep;
6013 tree running_off;
6014 vec<constructor_elt, va_gc> *v = NULL;
6015 gimple_seq stmts = NULL;
6016 tree stride_base, stride_step, alias_off;
6017
6018 gcc_assert (!nested_in_vect_loop);
6019
6020 stride_base
6021 = fold_build_pointer_plus
6022 (unshare_expr (DR_BASE_ADDRESS (dr)),
6023 size_binop (PLUS_EXPR,
6024 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6025 convert_to_ptrofftype (DR_INIT (dr))));
6026 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6027
6028 /* For a load with loop-invariant (but other than power-of-2)
6029 stride (i.e. not a grouped access) like so:
6030
6031 for (i = 0; i < n; i += stride)
6032 ... = array[i];
6033
6034 we generate a new induction variable and new accesses to
6035 form a new vector (or vectors, depending on ncopies):
6036
6037 for (j = 0; ; j += VF*stride)
6038 tmp1 = array[j];
6039 tmp2 = array[j + stride];
6040 ...
6041 vectemp = {tmp1, tmp2, ...}
6042 */
6043
6044 ivstep = stride_step;
6045 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6046 build_int_cst (TREE_TYPE (ivstep), vf));
6047
6048 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6049
6050 create_iv (stride_base, ivstep, NULL,
6051 loop, &incr_gsi, insert_after,
6052 &offvar, NULL);
6053 incr = gsi_stmt (incr_gsi);
6054 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6055
6056 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6057 if (stmts)
6058 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6059
6060 prev_stmt_info = NULL;
6061 running_off = offvar;
6062 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6063 for (j = 0; j < ncopies; j++)
6064 {
6065 tree vec_inv;
6066
6067 vec_alloc (v, nunits);
6068 for (i = 0; i < nunits; i++)
6069 {
6070 tree newref, newoff;
6071 gimple incr;
6072 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6073 running_off, alias_off);
6074
6075 newref = force_gimple_operand_gsi (gsi, newref, true,
6076 NULL_TREE, true,
6077 GSI_SAME_STMT);
6078 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6079 newoff = copy_ssa_name (running_off, NULL);
6080 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6081 running_off, stride_step);
6082 vect_finish_stmt_generation (stmt, incr, gsi);
6083
6084 running_off = newoff;
6085 }
6086
6087 vec_inv = build_constructor (vectype, v);
6088 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6089 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6090
6091 if (j == 0)
6092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6093 else
6094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6095 prev_stmt_info = vinfo_for_stmt (new_stmt);
6096 }
6097 return true;
6098 }
6099
6100 if (grouped_load)
6101 {
6102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6103 if (slp
6104 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6105 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6106 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6107
6108 /* Check if the chain of loads is already vectorized. */
6109 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6110 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6111 ??? But we can only do so if there is exactly one
6112 as we have no way to get at the rest. Leave the CSE
6113 opportunity alone.
6114 ??? With the group load eventually participating
6115 in multiple different permutations (having multiple
6116 slp nodes which refer to the same group) the CSE
6117 is even wrong code. See PR56270. */
6118 && !slp)
6119 {
6120 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6121 return true;
6122 }
6123 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6124 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6125
6126 /* VEC_NUM is the number of vect stmts to be created for this group. */
6127 if (slp)
6128 {
6129 grouped_load = false;
6130 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6131 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6132 slp_perm = true;
6133 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6134 }
6135 else
6136 {
6137 vec_num = group_size;
6138 group_gap = 0;
6139 }
6140 }
6141 else
6142 {
6143 first_stmt = stmt;
6144 first_dr = dr;
6145 group_size = vec_num = 1;
6146 group_gap = 0;
6147 }
6148
6149 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6150 gcc_assert (alignment_support_scheme);
6151 /* Targets with load-lane instructions must not require explicit
6152 realignment. */
6153 gcc_assert (!load_lanes_p
6154 || alignment_support_scheme == dr_aligned
6155 || alignment_support_scheme == dr_unaligned_supported);
6156
6157 /* In case the vectorization factor (VF) is bigger than the number
6158 of elements that we can fit in a vectype (nunits), we have to generate
6159 more than one vector stmt - i.e - we need to "unroll" the
6160 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6161 from one copy of the vector stmt to the next, in the field
6162 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6163 stages to find the correct vector defs to be used when vectorizing
6164 stmts that use the defs of the current stmt. The example below
6165 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6166 need to create 4 vectorized stmts):
6167
6168 before vectorization:
6169 RELATED_STMT VEC_STMT
6170 S1: x = memref - -
6171 S2: z = x + 1 - -
6172
6173 step 1: vectorize stmt S1:
6174 We first create the vector stmt VS1_0, and, as usual, record a
6175 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6176 Next, we create the vector stmt VS1_1, and record a pointer to
6177 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6178 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6179 stmts and pointers:
6180 RELATED_STMT VEC_STMT
6181 VS1_0: vx0 = memref0 VS1_1 -
6182 VS1_1: vx1 = memref1 VS1_2 -
6183 VS1_2: vx2 = memref2 VS1_3 -
6184 VS1_3: vx3 = memref3 - -
6185 S1: x = load - VS1_0
6186 S2: z = x + 1 - -
6187
6188 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6189 information we recorded in RELATED_STMT field is used to vectorize
6190 stmt S2. */
6191
6192 /* In case of interleaving (non-unit grouped access):
6193
6194 S1: x2 = &base + 2
6195 S2: x0 = &base
6196 S3: x1 = &base + 1
6197 S4: x3 = &base + 3
6198
6199 Vectorized loads are created in the order of memory accesses
6200 starting from the access of the first stmt of the chain:
6201
6202 VS1: vx0 = &base
6203 VS2: vx1 = &base + vec_size*1
6204 VS3: vx3 = &base + vec_size*2
6205 VS4: vx4 = &base + vec_size*3
6206
6207 Then permutation statements are generated:
6208
6209 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6210 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6211 ...
6212
6213 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6214 (the order of the data-refs in the output of vect_permute_load_chain
6215 corresponds to the order of scalar stmts in the interleaving chain - see
6216 the documentation of vect_permute_load_chain()).
6217 The generation of permutation stmts and recording them in
6218 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6219
6220 In case of both multiple types and interleaving, the vector loads and
6221 permutation stmts above are created for every copy. The result vector
6222 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6223 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6224
6225 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6226 on a target that supports unaligned accesses (dr_unaligned_supported)
6227 we generate the following code:
6228 p = initial_addr;
6229 indx = 0;
6230 loop {
6231 p = p + indx * vectype_size;
6232 vec_dest = *(p);
6233 indx = indx + 1;
6234 }
6235
6236 Otherwise, the data reference is potentially unaligned on a target that
6237 does not support unaligned accesses (dr_explicit_realign_optimized) -
6238 then generate the following code, in which the data in each iteration is
6239 obtained by two vector loads, one from the previous iteration, and one
6240 from the current iteration:
6241 p1 = initial_addr;
6242 msq_init = *(floor(p1))
6243 p2 = initial_addr + VS - 1;
6244 realignment_token = call target_builtin;
6245 indx = 0;
6246 loop {
6247 p2 = p2 + indx * vectype_size
6248 lsq = *(floor(p2))
6249 vec_dest = realign_load (msq, lsq, realignment_token)
6250 indx = indx + 1;
6251 msq = lsq;
6252 } */
6253
6254 /* If the misalignment remains the same throughout the execution of the
6255 loop, we can create the init_addr and permutation mask at the loop
6256 preheader. Otherwise, it needs to be created inside the loop.
6257 This can only occur when vectorizing memory accesses in the inner-loop
6258 nested within an outer-loop that is being vectorized. */
6259
6260 if (nested_in_vect_loop
6261 && (TREE_INT_CST_LOW (DR_STEP (dr))
6262 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6263 {
6264 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6265 compute_in_loop = true;
6266 }
6267
6268 if ((alignment_support_scheme == dr_explicit_realign_optimized
6269 || alignment_support_scheme == dr_explicit_realign)
6270 && !compute_in_loop)
6271 {
6272 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6273 alignment_support_scheme, NULL_TREE,
6274 &at_loop);
6275 if (alignment_support_scheme == dr_explicit_realign_optimized)
6276 {
6277 phi = SSA_NAME_DEF_STMT (msq);
6278 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6279 size_one_node);
6280 }
6281 }
6282 else
6283 at_loop = loop;
6284
6285 if (negative)
6286 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6287
6288 if (load_lanes_p)
6289 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6290 else
6291 aggr_type = vectype;
6292
6293 prev_stmt_info = NULL;
6294 for (j = 0; j < ncopies; j++)
6295 {
6296 /* 1. Create the vector or array pointer update chain. */
6297 if (j == 0)
6298 {
6299 bool simd_lane_access_p
6300 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6301 if (simd_lane_access_p
6302 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6303 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6304 && integer_zerop (DR_OFFSET (first_dr))
6305 && integer_zerop (DR_INIT (first_dr))
6306 && alias_sets_conflict_p (get_alias_set (aggr_type),
6307 get_alias_set (DR_REF (first_dr)))
6308 && (alignment_support_scheme == dr_aligned
6309 || alignment_support_scheme == dr_unaligned_supported))
6310 {
6311 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6312 dataref_offset = build_int_cst (reference_alias_ptr_type
6313 (DR_REF (first_dr)), 0);
6314 inv_p = false;
6315 }
6316 else
6317 dataref_ptr
6318 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6319 offset, &dummy, gsi, &ptr_incr,
6320 simd_lane_access_p, &inv_p,
6321 byte_offset);
6322 }
6323 else if (dataref_offset)
6324 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6325 TYPE_SIZE_UNIT (aggr_type));
6326 else
6327 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6328 TYPE_SIZE_UNIT (aggr_type));
6329
6330 if (grouped_load || slp_perm)
6331 dr_chain.create (vec_num);
6332
6333 if (load_lanes_p)
6334 {
6335 tree vec_array;
6336
6337 vec_array = create_vector_array (vectype, vec_num);
6338
6339 /* Emit:
6340 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6341 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6342 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6343 gimple_call_set_lhs (new_stmt, vec_array);
6344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6345
6346 /* Extract each vector into an SSA_NAME. */
6347 for (i = 0; i < vec_num; i++)
6348 {
6349 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6350 vec_array, i);
6351 dr_chain.quick_push (new_temp);
6352 }
6353
6354 /* Record the mapping between SSA_NAMEs and statements. */
6355 vect_record_grouped_load_vectors (stmt, dr_chain);
6356 }
6357 else
6358 {
6359 for (i = 0; i < vec_num; i++)
6360 {
6361 if (i > 0)
6362 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6363 stmt, NULL_TREE);
6364
6365 /* 2. Create the vector-load in the loop. */
6366 switch (alignment_support_scheme)
6367 {
6368 case dr_aligned:
6369 case dr_unaligned_supported:
6370 {
6371 unsigned int align, misalign;
6372
6373 data_ref
6374 = build2 (MEM_REF, vectype, dataref_ptr,
6375 dataref_offset
6376 ? dataref_offset
6377 : build_int_cst (reference_alias_ptr_type
6378 (DR_REF (first_dr)), 0));
6379 align = TYPE_ALIGN_UNIT (vectype);
6380 if (alignment_support_scheme == dr_aligned)
6381 {
6382 gcc_assert (aligned_access_p (first_dr));
6383 misalign = 0;
6384 }
6385 else if (DR_MISALIGNMENT (first_dr) == -1)
6386 {
6387 TREE_TYPE (data_ref)
6388 = build_aligned_type (TREE_TYPE (data_ref),
6389 TYPE_ALIGN (elem_type));
6390 align = TYPE_ALIGN_UNIT (elem_type);
6391 misalign = 0;
6392 }
6393 else
6394 {
6395 TREE_TYPE (data_ref)
6396 = build_aligned_type (TREE_TYPE (data_ref),
6397 TYPE_ALIGN (elem_type));
6398 misalign = DR_MISALIGNMENT (first_dr);
6399 }
6400 if (dataref_offset == NULL_TREE)
6401 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6402 align, misalign);
6403 break;
6404 }
6405 case dr_explicit_realign:
6406 {
6407 tree ptr, bump;
6408 tree vs_minus_1;
6409
6410 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6411
6412 if (compute_in_loop)
6413 msq = vect_setup_realignment (first_stmt, gsi,
6414 &realignment_token,
6415 dr_explicit_realign,
6416 dataref_ptr, NULL);
6417
6418 ptr = copy_ssa_name (dataref_ptr, NULL);
6419 new_stmt = gimple_build_assign_with_ops
6420 (BIT_AND_EXPR, ptr, dataref_ptr,
6421 build_int_cst
6422 (TREE_TYPE (dataref_ptr),
6423 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6425 data_ref
6426 = build2 (MEM_REF, vectype, ptr,
6427 build_int_cst (reference_alias_ptr_type
6428 (DR_REF (first_dr)), 0));
6429 vec_dest = vect_create_destination_var (scalar_dest,
6430 vectype);
6431 new_stmt = gimple_build_assign (vec_dest, data_ref);
6432 new_temp = make_ssa_name (vec_dest, new_stmt);
6433 gimple_assign_set_lhs (new_stmt, new_temp);
6434 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6435 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6436 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6437 msq = new_temp;
6438
6439 bump = size_binop (MULT_EXPR, vs_minus_1,
6440 TYPE_SIZE_UNIT (elem_type));
6441 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6442 new_stmt = gimple_build_assign_with_ops
6443 (BIT_AND_EXPR, NULL_TREE, ptr,
6444 build_int_cst
6445 (TREE_TYPE (ptr),
6446 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6447 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6448 gimple_assign_set_lhs (new_stmt, ptr);
6449 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6450 data_ref
6451 = build2 (MEM_REF, vectype, ptr,
6452 build_int_cst (reference_alias_ptr_type
6453 (DR_REF (first_dr)), 0));
6454 break;
6455 }
6456 case dr_explicit_realign_optimized:
6457 new_temp = copy_ssa_name (dataref_ptr, NULL);
6458 new_stmt = gimple_build_assign_with_ops
6459 (BIT_AND_EXPR, new_temp, dataref_ptr,
6460 build_int_cst
6461 (TREE_TYPE (dataref_ptr),
6462 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6463 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6464 data_ref
6465 = build2 (MEM_REF, vectype, new_temp,
6466 build_int_cst (reference_alias_ptr_type
6467 (DR_REF (first_dr)), 0));
6468 break;
6469 default:
6470 gcc_unreachable ();
6471 }
6472 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6473 new_stmt = gimple_build_assign (vec_dest, data_ref);
6474 new_temp = make_ssa_name (vec_dest, new_stmt);
6475 gimple_assign_set_lhs (new_stmt, new_temp);
6476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6477
6478 /* 3. Handle explicit realignment if necessary/supported.
6479 Create in loop:
6480 vec_dest = realign_load (msq, lsq, realignment_token) */
6481 if (alignment_support_scheme == dr_explicit_realign_optimized
6482 || alignment_support_scheme == dr_explicit_realign)
6483 {
6484 lsq = gimple_assign_lhs (new_stmt);
6485 if (!realignment_token)
6486 realignment_token = dataref_ptr;
6487 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6488 new_stmt
6489 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6490 vec_dest, msq, lsq,
6491 realignment_token);
6492 new_temp = make_ssa_name (vec_dest, new_stmt);
6493 gimple_assign_set_lhs (new_stmt, new_temp);
6494 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6495
6496 if (alignment_support_scheme == dr_explicit_realign_optimized)
6497 {
6498 gcc_assert (phi);
6499 if (i == vec_num - 1 && j == ncopies - 1)
6500 add_phi_arg (phi, lsq,
6501 loop_latch_edge (containing_loop),
6502 UNKNOWN_LOCATION);
6503 msq = lsq;
6504 }
6505 }
6506
6507 /* 4. Handle invariant-load. */
6508 if (inv_p && !bb_vinfo)
6509 {
6510 gcc_assert (!grouped_load);
6511 /* If we have versioned for aliasing or the loop doesn't
6512 have any data dependencies that would preclude this,
6513 then we are sure this is a loop invariant load and
6514 thus we can insert it on the preheader edge. */
6515 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6516 && !nested_in_vect_loop
6517 && hoist_defs_of_uses (stmt, loop))
6518 {
6519 if (dump_enabled_p ())
6520 {
6521 dump_printf_loc (MSG_NOTE, vect_location,
6522 "hoisting out of the vectorized "
6523 "loop: ");
6524 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6525 dump_printf (MSG_NOTE, "\n");
6526 }
6527 tree tem = copy_ssa_name (scalar_dest, NULL);
6528 gsi_insert_on_edge_immediate
6529 (loop_preheader_edge (loop),
6530 gimple_build_assign (tem,
6531 unshare_expr
6532 (gimple_assign_rhs1 (stmt))));
6533 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6534 }
6535 else
6536 {
6537 gimple_stmt_iterator gsi2 = *gsi;
6538 gsi_next (&gsi2);
6539 new_temp = vect_init_vector (stmt, scalar_dest,
6540 vectype, &gsi2);
6541 }
6542 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6543 set_vinfo_for_stmt (new_stmt,
6544 new_stmt_vec_info (new_stmt, loop_vinfo,
6545 bb_vinfo));
6546 }
6547
6548 if (negative)
6549 {
6550 tree perm_mask = perm_mask_for_reverse (vectype);
6551 new_temp = permute_vec_elements (new_temp, new_temp,
6552 perm_mask, stmt, gsi);
6553 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6554 }
6555
6556 /* Collect vector loads and later create their permutation in
6557 vect_transform_grouped_load (). */
6558 if (grouped_load || slp_perm)
6559 dr_chain.quick_push (new_temp);
6560
6561 /* Store vector loads in the corresponding SLP_NODE. */
6562 if (slp && !slp_perm)
6563 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6564 }
6565 /* Bump the vector pointer to account for a gap. */
6566 if (slp && group_gap != 0)
6567 {
6568 tree bump = size_binop (MULT_EXPR,
6569 TYPE_SIZE_UNIT (elem_type),
6570 size_int (group_gap));
6571 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6572 stmt, bump);
6573 }
6574 }
6575
6576 if (slp && !slp_perm)
6577 continue;
6578
6579 if (slp_perm)
6580 {
6581 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6582 slp_node_instance, false))
6583 {
6584 dr_chain.release ();
6585 return false;
6586 }
6587 }
6588 else
6589 {
6590 if (grouped_load)
6591 {
6592 if (!load_lanes_p)
6593 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6594 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6595 }
6596 else
6597 {
6598 if (j == 0)
6599 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6600 else
6601 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6602 prev_stmt_info = vinfo_for_stmt (new_stmt);
6603 }
6604 }
6605 dr_chain.release ();
6606 }
6607
6608 return true;
6609 }
6610
6611 /* Function vect_is_simple_cond.
6612
6613 Input:
6614 LOOP - the loop that is being vectorized.
6615 COND - Condition that is checked for simple use.
6616
6617 Output:
6618 *COMP_VECTYPE - the vector type for the comparison.
6619
6620 Returns whether a COND can be vectorized. Checks whether
6621 condition operands are supportable using vec_is_simple_use. */
6622
6623 static bool
6624 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6625 bb_vec_info bb_vinfo, tree *comp_vectype)
6626 {
6627 tree lhs, rhs;
6628 tree def;
6629 enum vect_def_type dt;
6630 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6631
6632 if (!COMPARISON_CLASS_P (cond))
6633 return false;
6634
6635 lhs = TREE_OPERAND (cond, 0);
6636 rhs = TREE_OPERAND (cond, 1);
6637
6638 if (TREE_CODE (lhs) == SSA_NAME)
6639 {
6640 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6641 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6642 &lhs_def_stmt, &def, &dt, &vectype1))
6643 return false;
6644 }
6645 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6646 && TREE_CODE (lhs) != FIXED_CST)
6647 return false;
6648
6649 if (TREE_CODE (rhs) == SSA_NAME)
6650 {
6651 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6652 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6653 &rhs_def_stmt, &def, &dt, &vectype2))
6654 return false;
6655 }
6656 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6657 && TREE_CODE (rhs) != FIXED_CST)
6658 return false;
6659
6660 *comp_vectype = vectype1 ? vectype1 : vectype2;
6661 return true;
6662 }
6663
6664 /* vectorizable_condition.
6665
6666 Check if STMT is conditional modify expression that can be vectorized.
6667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6668 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6669 at GSI.
6670
6671 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6672 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6673 else caluse if it is 2).
6674
6675 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6676
6677 bool
6678 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6679 gimple *vec_stmt, tree reduc_def, int reduc_index,
6680 slp_tree slp_node)
6681 {
6682 tree scalar_dest = NULL_TREE;
6683 tree vec_dest = NULL_TREE;
6684 tree cond_expr, then_clause, else_clause;
6685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6687 tree comp_vectype = NULL_TREE;
6688 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6689 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6690 tree vec_compare, vec_cond_expr;
6691 tree new_temp;
6692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6693 tree def;
6694 enum vect_def_type dt, dts[4];
6695 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6696 int ncopies;
6697 enum tree_code code;
6698 stmt_vec_info prev_stmt_info = NULL;
6699 int i, j;
6700 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6701 vec<tree> vec_oprnds0 = vNULL;
6702 vec<tree> vec_oprnds1 = vNULL;
6703 vec<tree> vec_oprnds2 = vNULL;
6704 vec<tree> vec_oprnds3 = vNULL;
6705 tree vec_cmp_type;
6706
6707 if (slp_node || PURE_SLP_STMT (stmt_info))
6708 ncopies = 1;
6709 else
6710 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6711
6712 gcc_assert (ncopies >= 1);
6713 if (reduc_index && ncopies > 1)
6714 return false; /* FORNOW */
6715
6716 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6717 return false;
6718
6719 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6720 return false;
6721
6722 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6723 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6724 && reduc_def))
6725 return false;
6726
6727 /* FORNOW: not yet supported. */
6728 if (STMT_VINFO_LIVE_P (stmt_info))
6729 {
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6732 "value used after loop.\n");
6733 return false;
6734 }
6735
6736 /* Is vectorizable conditional operation? */
6737 if (!is_gimple_assign (stmt))
6738 return false;
6739
6740 code = gimple_assign_rhs_code (stmt);
6741
6742 if (code != COND_EXPR)
6743 return false;
6744
6745 cond_expr = gimple_assign_rhs1 (stmt);
6746 then_clause = gimple_assign_rhs2 (stmt);
6747 else_clause = gimple_assign_rhs3 (stmt);
6748
6749 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6750 &comp_vectype)
6751 || !comp_vectype)
6752 return false;
6753
6754 if (TREE_CODE (then_clause) == SSA_NAME)
6755 {
6756 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6757 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6758 &then_def_stmt, &def, &dt))
6759 return false;
6760 }
6761 else if (TREE_CODE (then_clause) != INTEGER_CST
6762 && TREE_CODE (then_clause) != REAL_CST
6763 && TREE_CODE (then_clause) != FIXED_CST)
6764 return false;
6765
6766 if (TREE_CODE (else_clause) == SSA_NAME)
6767 {
6768 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6769 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6770 &else_def_stmt, &def, &dt))
6771 return false;
6772 }
6773 else if (TREE_CODE (else_clause) != INTEGER_CST
6774 && TREE_CODE (else_clause) != REAL_CST
6775 && TREE_CODE (else_clause) != FIXED_CST)
6776 return false;
6777
6778 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6779 /* The result of a vector comparison should be signed type. */
6780 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6781 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6782 if (vec_cmp_type == NULL_TREE)
6783 return false;
6784
6785 if (!vec_stmt)
6786 {
6787 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6788 return expand_vec_cond_expr_p (vectype, comp_vectype);
6789 }
6790
6791 /* Transform. */
6792
6793 if (!slp_node)
6794 {
6795 vec_oprnds0.create (1);
6796 vec_oprnds1.create (1);
6797 vec_oprnds2.create (1);
6798 vec_oprnds3.create (1);
6799 }
6800
6801 /* Handle def. */
6802 scalar_dest = gimple_assign_lhs (stmt);
6803 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6804
6805 /* Handle cond expr. */
6806 for (j = 0; j < ncopies; j++)
6807 {
6808 gimple new_stmt = NULL;
6809 if (j == 0)
6810 {
6811 if (slp_node)
6812 {
6813 auto_vec<tree, 4> ops;
6814 auto_vec<vec<tree>, 4> vec_defs;
6815
6816 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6817 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6818 ops.safe_push (then_clause);
6819 ops.safe_push (else_clause);
6820 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6821 vec_oprnds3 = vec_defs.pop ();
6822 vec_oprnds2 = vec_defs.pop ();
6823 vec_oprnds1 = vec_defs.pop ();
6824 vec_oprnds0 = vec_defs.pop ();
6825
6826 ops.release ();
6827 vec_defs.release ();
6828 }
6829 else
6830 {
6831 gimple gtemp;
6832 vec_cond_lhs =
6833 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6834 stmt, NULL);
6835 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6836 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6837
6838 vec_cond_rhs =
6839 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6840 stmt, NULL);
6841 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6842 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6843 if (reduc_index == 1)
6844 vec_then_clause = reduc_def;
6845 else
6846 {
6847 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6848 stmt, NULL);
6849 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6850 NULL, &gtemp, &def, &dts[2]);
6851 }
6852 if (reduc_index == 2)
6853 vec_else_clause = reduc_def;
6854 else
6855 {
6856 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6857 stmt, NULL);
6858 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6859 NULL, &gtemp, &def, &dts[3]);
6860 }
6861 }
6862 }
6863 else
6864 {
6865 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6866 vec_oprnds0.pop ());
6867 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6868 vec_oprnds1.pop ());
6869 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6870 vec_oprnds2.pop ());
6871 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6872 vec_oprnds3.pop ());
6873 }
6874
6875 if (!slp_node)
6876 {
6877 vec_oprnds0.quick_push (vec_cond_lhs);
6878 vec_oprnds1.quick_push (vec_cond_rhs);
6879 vec_oprnds2.quick_push (vec_then_clause);
6880 vec_oprnds3.quick_push (vec_else_clause);
6881 }
6882
6883 /* Arguments are ready. Create the new vector stmt. */
6884 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6885 {
6886 vec_cond_rhs = vec_oprnds1[i];
6887 vec_then_clause = vec_oprnds2[i];
6888 vec_else_clause = vec_oprnds3[i];
6889
6890 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6891 vec_cond_lhs, vec_cond_rhs);
6892 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6893 vec_compare, vec_then_clause, vec_else_clause);
6894
6895 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6896 new_temp = make_ssa_name (vec_dest, new_stmt);
6897 gimple_assign_set_lhs (new_stmt, new_temp);
6898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6899 if (slp_node)
6900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6901 }
6902
6903 if (slp_node)
6904 continue;
6905
6906 if (j == 0)
6907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6908 else
6909 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6910
6911 prev_stmt_info = vinfo_for_stmt (new_stmt);
6912 }
6913
6914 vec_oprnds0.release ();
6915 vec_oprnds1.release ();
6916 vec_oprnds2.release ();
6917 vec_oprnds3.release ();
6918
6919 return true;
6920 }
6921
6922
6923 /* Make sure the statement is vectorizable. */
6924
6925 bool
6926 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6927 {
6928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6929 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6930 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6931 bool ok;
6932 tree scalar_type, vectype;
6933 gimple pattern_stmt;
6934 gimple_seq pattern_def_seq;
6935
6936 if (dump_enabled_p ())
6937 {
6938 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6939 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6940 dump_printf (MSG_NOTE, "\n");
6941 }
6942
6943 if (gimple_has_volatile_ops (stmt))
6944 {
6945 if (dump_enabled_p ())
6946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6947 "not vectorized: stmt has volatile operands\n");
6948
6949 return false;
6950 }
6951
6952 /* Skip stmts that do not need to be vectorized. In loops this is expected
6953 to include:
6954 - the COND_EXPR which is the loop exit condition
6955 - any LABEL_EXPRs in the loop
6956 - computations that are used only for array indexing or loop control.
6957 In basic blocks we only analyze statements that are a part of some SLP
6958 instance, therefore, all the statements are relevant.
6959
6960 Pattern statement needs to be analyzed instead of the original statement
6961 if the original statement is not relevant. Otherwise, we analyze both
6962 statements. In basic blocks we are called from some SLP instance
6963 traversal, don't analyze pattern stmts instead, the pattern stmts
6964 already will be part of SLP instance. */
6965
6966 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6967 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6968 && !STMT_VINFO_LIVE_P (stmt_info))
6969 {
6970 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6971 && pattern_stmt
6972 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6973 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6974 {
6975 /* Analyze PATTERN_STMT instead of the original stmt. */
6976 stmt = pattern_stmt;
6977 stmt_info = vinfo_for_stmt (pattern_stmt);
6978 if (dump_enabled_p ())
6979 {
6980 dump_printf_loc (MSG_NOTE, vect_location,
6981 "==> examining pattern statement: ");
6982 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6983 dump_printf (MSG_NOTE, "\n");
6984 }
6985 }
6986 else
6987 {
6988 if (dump_enabled_p ())
6989 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6990
6991 return true;
6992 }
6993 }
6994 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6995 && node == NULL
6996 && pattern_stmt
6997 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6998 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6999 {
7000 /* Analyze PATTERN_STMT too. */
7001 if (dump_enabled_p ())
7002 {
7003 dump_printf_loc (MSG_NOTE, vect_location,
7004 "==> examining pattern statement: ");
7005 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7006 dump_printf (MSG_NOTE, "\n");
7007 }
7008
7009 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7010 return false;
7011 }
7012
7013 if (is_pattern_stmt_p (stmt_info)
7014 && node == NULL
7015 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7016 {
7017 gimple_stmt_iterator si;
7018
7019 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7020 {
7021 gimple pattern_def_stmt = gsi_stmt (si);
7022 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7023 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7024 {
7025 /* Analyze def stmt of STMT if it's a pattern stmt. */
7026 if (dump_enabled_p ())
7027 {
7028 dump_printf_loc (MSG_NOTE, vect_location,
7029 "==> examining pattern def statement: ");
7030 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7031 dump_printf (MSG_NOTE, "\n");
7032 }
7033
7034 if (!vect_analyze_stmt (pattern_def_stmt,
7035 need_to_vectorize, node))
7036 return false;
7037 }
7038 }
7039 }
7040
7041 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7042 {
7043 case vect_internal_def:
7044 break;
7045
7046 case vect_reduction_def:
7047 case vect_nested_cycle:
7048 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7049 || relevance == vect_used_in_outer_by_reduction
7050 || relevance == vect_unused_in_scope));
7051 break;
7052
7053 case vect_induction_def:
7054 case vect_constant_def:
7055 case vect_external_def:
7056 case vect_unknown_def_type:
7057 default:
7058 gcc_unreachable ();
7059 }
7060
7061 if (bb_vinfo)
7062 {
7063 gcc_assert (PURE_SLP_STMT (stmt_info));
7064
7065 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7066 if (dump_enabled_p ())
7067 {
7068 dump_printf_loc (MSG_NOTE, vect_location,
7069 "get vectype for scalar type: ");
7070 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7071 dump_printf (MSG_NOTE, "\n");
7072 }
7073
7074 vectype = get_vectype_for_scalar_type (scalar_type);
7075 if (!vectype)
7076 {
7077 if (dump_enabled_p ())
7078 {
7079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7080 "not SLPed: unsupported data-type ");
7081 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7082 scalar_type);
7083 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7084 }
7085 return false;
7086 }
7087
7088 if (dump_enabled_p ())
7089 {
7090 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7091 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7092 dump_printf (MSG_NOTE, "\n");
7093 }
7094
7095 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7096 }
7097
7098 if (STMT_VINFO_RELEVANT_P (stmt_info))
7099 {
7100 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7101 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7102 || (is_gimple_call (stmt)
7103 && gimple_call_lhs (stmt) == NULL_TREE));
7104 *need_to_vectorize = true;
7105 }
7106
7107 ok = true;
7108 if (!bb_vinfo
7109 && (STMT_VINFO_RELEVANT_P (stmt_info)
7110 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7111 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7112 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7113 || vectorizable_shift (stmt, NULL, NULL, NULL)
7114 || vectorizable_operation (stmt, NULL, NULL, NULL)
7115 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7116 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7117 || vectorizable_call (stmt, NULL, NULL, NULL)
7118 || vectorizable_store (stmt, NULL, NULL, NULL)
7119 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7120 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7121 else
7122 {
7123 if (bb_vinfo)
7124 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7125 || vectorizable_conversion (stmt, NULL, NULL, node)
7126 || vectorizable_shift (stmt, NULL, NULL, node)
7127 || vectorizable_operation (stmt, NULL, NULL, node)
7128 || vectorizable_assignment (stmt, NULL, NULL, node)
7129 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7130 || vectorizable_call (stmt, NULL, NULL, node)
7131 || vectorizable_store (stmt, NULL, NULL, node)
7132 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7133 }
7134
7135 if (!ok)
7136 {
7137 if (dump_enabled_p ())
7138 {
7139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7140 "not vectorized: relevant stmt not ");
7141 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7142 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7143 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7144 }
7145
7146 return false;
7147 }
7148
7149 if (bb_vinfo)
7150 return true;
7151
7152 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7153 need extra handling, except for vectorizable reductions. */
7154 if (STMT_VINFO_LIVE_P (stmt_info)
7155 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7156 ok = vectorizable_live_operation (stmt, NULL, NULL);
7157
7158 if (!ok)
7159 {
7160 if (dump_enabled_p ())
7161 {
7162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7163 "not vectorized: live stmt not ");
7164 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7165 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7166 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7167 }
7168
7169 return false;
7170 }
7171
7172 return true;
7173 }
7174
7175
7176 /* Function vect_transform_stmt.
7177
7178 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7179
7180 bool
7181 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7182 bool *grouped_store, slp_tree slp_node,
7183 slp_instance slp_node_instance)
7184 {
7185 bool is_store = false;
7186 gimple vec_stmt = NULL;
7187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7188 bool done;
7189
7190 switch (STMT_VINFO_TYPE (stmt_info))
7191 {
7192 case type_demotion_vec_info_type:
7193 case type_promotion_vec_info_type:
7194 case type_conversion_vec_info_type:
7195 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7196 gcc_assert (done);
7197 break;
7198
7199 case induc_vec_info_type:
7200 gcc_assert (!slp_node);
7201 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7202 gcc_assert (done);
7203 break;
7204
7205 case shift_vec_info_type:
7206 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7207 gcc_assert (done);
7208 break;
7209
7210 case op_vec_info_type:
7211 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7212 gcc_assert (done);
7213 break;
7214
7215 case assignment_vec_info_type:
7216 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7217 gcc_assert (done);
7218 break;
7219
7220 case load_vec_info_type:
7221 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7222 slp_node_instance);
7223 gcc_assert (done);
7224 break;
7225
7226 case store_vec_info_type:
7227 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7228 gcc_assert (done);
7229 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7230 {
7231 /* In case of interleaving, the whole chain is vectorized when the
7232 last store in the chain is reached. Store stmts before the last
7233 one are skipped, and there vec_stmt_info shouldn't be freed
7234 meanwhile. */
7235 *grouped_store = true;
7236 if (STMT_VINFO_VEC_STMT (stmt_info))
7237 is_store = true;
7238 }
7239 else
7240 is_store = true;
7241 break;
7242
7243 case condition_vec_info_type:
7244 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7245 gcc_assert (done);
7246 break;
7247
7248 case call_vec_info_type:
7249 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7250 stmt = gsi_stmt (*gsi);
7251 if (is_gimple_call (stmt)
7252 && gimple_call_internal_p (stmt)
7253 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7254 is_store = true;
7255 break;
7256
7257 case call_simd_clone_vec_info_type:
7258 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7259 stmt = gsi_stmt (*gsi);
7260 break;
7261
7262 case reduc_vec_info_type:
7263 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7264 gcc_assert (done);
7265 break;
7266
7267 default:
7268 if (!STMT_VINFO_LIVE_P (stmt_info))
7269 {
7270 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7272 "stmt not supported.\n");
7273 gcc_unreachable ();
7274 }
7275 }
7276
7277 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7278 is being vectorized, but outside the immediately enclosing loop. */
7279 if (vec_stmt
7280 && STMT_VINFO_LOOP_VINFO (stmt_info)
7281 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7282 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7283 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7284 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7285 || STMT_VINFO_RELEVANT (stmt_info) ==
7286 vect_used_in_outer_by_reduction))
7287 {
7288 struct loop *innerloop = LOOP_VINFO_LOOP (
7289 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7290 imm_use_iterator imm_iter;
7291 use_operand_p use_p;
7292 tree scalar_dest;
7293 gimple exit_phi;
7294
7295 if (dump_enabled_p ())
7296 dump_printf_loc (MSG_NOTE, vect_location,
7297 "Record the vdef for outer-loop vectorization.\n");
7298
7299 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7300 (to be used when vectorizing outer-loop stmts that use the DEF of
7301 STMT). */
7302 if (gimple_code (stmt) == GIMPLE_PHI)
7303 scalar_dest = PHI_RESULT (stmt);
7304 else
7305 scalar_dest = gimple_assign_lhs (stmt);
7306
7307 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7308 {
7309 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7310 {
7311 exit_phi = USE_STMT (use_p);
7312 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7313 }
7314 }
7315 }
7316
7317 /* Handle stmts whose DEF is used outside the loop-nest that is
7318 being vectorized. */
7319 if (STMT_VINFO_LIVE_P (stmt_info)
7320 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7321 {
7322 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7323 gcc_assert (done);
7324 }
7325
7326 if (vec_stmt)
7327 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7328
7329 return is_store;
7330 }
7331
7332
7333 /* Remove a group of stores (for SLP or interleaving), free their
7334 stmt_vec_info. */
7335
7336 void
7337 vect_remove_stores (gimple first_stmt)
7338 {
7339 gimple next = first_stmt;
7340 gimple tmp;
7341 gimple_stmt_iterator next_si;
7342
7343 while (next)
7344 {
7345 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7346
7347 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7348 if (is_pattern_stmt_p (stmt_info))
7349 next = STMT_VINFO_RELATED_STMT (stmt_info);
7350 /* Free the attached stmt_vec_info and remove the stmt. */
7351 next_si = gsi_for_stmt (next);
7352 unlink_stmt_vdef (next);
7353 gsi_remove (&next_si, true);
7354 release_defs (next);
7355 free_stmt_vec_info (next);
7356 next = tmp;
7357 }
7358 }
7359
7360
7361 /* Function new_stmt_vec_info.
7362
7363 Create and initialize a new stmt_vec_info struct for STMT. */
7364
7365 stmt_vec_info
7366 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7367 bb_vec_info bb_vinfo)
7368 {
7369 stmt_vec_info res;
7370 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7371
7372 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7373 STMT_VINFO_STMT (res) = stmt;
7374 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7375 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7376 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7377 STMT_VINFO_LIVE_P (res) = false;
7378 STMT_VINFO_VECTYPE (res) = NULL;
7379 STMT_VINFO_VEC_STMT (res) = NULL;
7380 STMT_VINFO_VECTORIZABLE (res) = true;
7381 STMT_VINFO_IN_PATTERN_P (res) = false;
7382 STMT_VINFO_RELATED_STMT (res) = NULL;
7383 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7384 STMT_VINFO_DATA_REF (res) = NULL;
7385
7386 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7387 STMT_VINFO_DR_OFFSET (res) = NULL;
7388 STMT_VINFO_DR_INIT (res) = NULL;
7389 STMT_VINFO_DR_STEP (res) = NULL;
7390 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7391
7392 if (gimple_code (stmt) == GIMPLE_PHI
7393 && is_loop_header_bb_p (gimple_bb (stmt)))
7394 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7395 else
7396 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7397
7398 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7399 STMT_SLP_TYPE (res) = loop_vect;
7400 GROUP_FIRST_ELEMENT (res) = NULL;
7401 GROUP_NEXT_ELEMENT (res) = NULL;
7402 GROUP_SIZE (res) = 0;
7403 GROUP_STORE_COUNT (res) = 0;
7404 GROUP_GAP (res) = 0;
7405 GROUP_SAME_DR_STMT (res) = NULL;
7406
7407 return res;
7408 }
7409
7410
7411 /* Create a hash table for stmt_vec_info. */
7412
7413 void
7414 init_stmt_vec_info_vec (void)
7415 {
7416 gcc_assert (!stmt_vec_info_vec.exists ());
7417 stmt_vec_info_vec.create (50);
7418 }
7419
7420
7421 /* Free hash table for stmt_vec_info. */
7422
7423 void
7424 free_stmt_vec_info_vec (void)
7425 {
7426 unsigned int i;
7427 vec_void_p info;
7428 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7429 if (info != NULL)
7430 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7431 gcc_assert (stmt_vec_info_vec.exists ());
7432 stmt_vec_info_vec.release ();
7433 }
7434
7435
7436 /* Free stmt vectorization related info. */
7437
7438 void
7439 free_stmt_vec_info (gimple stmt)
7440 {
7441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7442
7443 if (!stmt_info)
7444 return;
7445
7446 /* Check if this statement has a related "pattern stmt"
7447 (introduced by the vectorizer during the pattern recognition
7448 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7449 too. */
7450 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7451 {
7452 stmt_vec_info patt_info
7453 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7454 if (patt_info)
7455 {
7456 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7457 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7458 gimple_set_bb (patt_stmt, NULL);
7459 tree lhs = gimple_get_lhs (patt_stmt);
7460 if (TREE_CODE (lhs) == SSA_NAME)
7461 release_ssa_name (lhs);
7462 if (seq)
7463 {
7464 gimple_stmt_iterator si;
7465 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7466 {
7467 gimple seq_stmt = gsi_stmt (si);
7468 gimple_set_bb (seq_stmt, NULL);
7469 lhs = gimple_get_lhs (patt_stmt);
7470 if (TREE_CODE (lhs) == SSA_NAME)
7471 release_ssa_name (lhs);
7472 free_stmt_vec_info (seq_stmt);
7473 }
7474 }
7475 free_stmt_vec_info (patt_stmt);
7476 }
7477 }
7478
7479 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7480 set_vinfo_for_stmt (stmt, NULL);
7481 free (stmt_info);
7482 }
7483
7484
7485 /* Function get_vectype_for_scalar_type_and_size.
7486
7487 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7488 by the target. */
7489
7490 static tree
7491 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7492 {
7493 machine_mode inner_mode = TYPE_MODE (scalar_type);
7494 machine_mode simd_mode;
7495 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7496 int nunits;
7497 tree vectype;
7498
7499 if (nbytes == 0)
7500 return NULL_TREE;
7501
7502 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7503 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7504 return NULL_TREE;
7505
7506 /* For vector types of elements whose mode precision doesn't
7507 match their types precision we use a element type of mode
7508 precision. The vectorization routines will have to make sure
7509 they support the proper result truncation/extension.
7510 We also make sure to build vector types with INTEGER_TYPE
7511 component type only. */
7512 if (INTEGRAL_TYPE_P (scalar_type)
7513 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7514 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7515 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7516 TYPE_UNSIGNED (scalar_type));
7517
7518 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7519 When the component mode passes the above test simply use a type
7520 corresponding to that mode. The theory is that any use that
7521 would cause problems with this will disable vectorization anyway. */
7522 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7523 && !INTEGRAL_TYPE_P (scalar_type))
7524 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7525
7526 /* We can't build a vector type of elements with alignment bigger than
7527 their size. */
7528 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7529 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7530 TYPE_UNSIGNED (scalar_type));
7531
7532 /* If we felt back to using the mode fail if there was
7533 no scalar type for it. */
7534 if (scalar_type == NULL_TREE)
7535 return NULL_TREE;
7536
7537 /* If no size was supplied use the mode the target prefers. Otherwise
7538 lookup a vector mode of the specified size. */
7539 if (size == 0)
7540 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7541 else
7542 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7543 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7544 if (nunits <= 1)
7545 return NULL_TREE;
7546
7547 vectype = build_vector_type (scalar_type, nunits);
7548
7549 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7550 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7551 return NULL_TREE;
7552
7553 return vectype;
7554 }
7555
7556 unsigned int current_vector_size;
7557
7558 /* Function get_vectype_for_scalar_type.
7559
7560 Returns the vector type corresponding to SCALAR_TYPE as supported
7561 by the target. */
7562
7563 tree
7564 get_vectype_for_scalar_type (tree scalar_type)
7565 {
7566 tree vectype;
7567 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7568 current_vector_size);
7569 if (vectype
7570 && current_vector_size == 0)
7571 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7572 return vectype;
7573 }
7574
7575 /* Function get_same_sized_vectype
7576
7577 Returns a vector type corresponding to SCALAR_TYPE of size
7578 VECTOR_TYPE if supported by the target. */
7579
7580 tree
7581 get_same_sized_vectype (tree scalar_type, tree vector_type)
7582 {
7583 return get_vectype_for_scalar_type_and_size
7584 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7585 }
7586
7587 /* Function vect_is_simple_use.
7588
7589 Input:
7590 LOOP_VINFO - the vect info of the loop that is being vectorized.
7591 BB_VINFO - the vect info of the basic block that is being vectorized.
7592 OPERAND - operand of STMT in the loop or bb.
7593 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7594
7595 Returns whether a stmt with OPERAND can be vectorized.
7596 For loops, supportable operands are constants, loop invariants, and operands
7597 that are defined by the current iteration of the loop. Unsupportable
7598 operands are those that are defined by a previous iteration of the loop (as
7599 is the case in reduction/induction computations).
7600 For basic blocks, supportable operands are constants and bb invariants.
7601 For now, operands defined outside the basic block are not supported. */
7602
7603 bool
7604 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7605 bb_vec_info bb_vinfo, gimple *def_stmt,
7606 tree *def, enum vect_def_type *dt)
7607 {
7608 basic_block bb;
7609 stmt_vec_info stmt_vinfo;
7610 struct loop *loop = NULL;
7611
7612 if (loop_vinfo)
7613 loop = LOOP_VINFO_LOOP (loop_vinfo);
7614
7615 *def_stmt = NULL;
7616 *def = NULL_TREE;
7617
7618 if (dump_enabled_p ())
7619 {
7620 dump_printf_loc (MSG_NOTE, vect_location,
7621 "vect_is_simple_use: operand ");
7622 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7623 dump_printf (MSG_NOTE, "\n");
7624 }
7625
7626 if (CONSTANT_CLASS_P (operand))
7627 {
7628 *dt = vect_constant_def;
7629 return true;
7630 }
7631
7632 if (is_gimple_min_invariant (operand))
7633 {
7634 *def = operand;
7635 *dt = vect_external_def;
7636 return true;
7637 }
7638
7639 if (TREE_CODE (operand) == PAREN_EXPR)
7640 {
7641 if (dump_enabled_p ())
7642 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7643 operand = TREE_OPERAND (operand, 0);
7644 }
7645
7646 if (TREE_CODE (operand) != SSA_NAME)
7647 {
7648 if (dump_enabled_p ())
7649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7650 "not ssa-name.\n");
7651 return false;
7652 }
7653
7654 *def_stmt = SSA_NAME_DEF_STMT (operand);
7655 if (*def_stmt == NULL)
7656 {
7657 if (dump_enabled_p ())
7658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7659 "no def_stmt.\n");
7660 return false;
7661 }
7662
7663 if (dump_enabled_p ())
7664 {
7665 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7667 dump_printf (MSG_NOTE, "\n");
7668 }
7669
7670 /* Empty stmt is expected only in case of a function argument.
7671 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7672 if (gimple_nop_p (*def_stmt))
7673 {
7674 *def = operand;
7675 *dt = vect_external_def;
7676 return true;
7677 }
7678
7679 bb = gimple_bb (*def_stmt);
7680
7681 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7682 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7683 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7684 *dt = vect_external_def;
7685 else
7686 {
7687 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7688 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7689 }
7690
7691 if (*dt == vect_unknown_def_type
7692 || (stmt
7693 && *dt == vect_double_reduction_def
7694 && gimple_code (stmt) != GIMPLE_PHI))
7695 {
7696 if (dump_enabled_p ())
7697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7698 "Unsupported pattern.\n");
7699 return false;
7700 }
7701
7702 if (dump_enabled_p ())
7703 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7704
7705 switch (gimple_code (*def_stmt))
7706 {
7707 case GIMPLE_PHI:
7708 *def = gimple_phi_result (*def_stmt);
7709 break;
7710
7711 case GIMPLE_ASSIGN:
7712 *def = gimple_assign_lhs (*def_stmt);
7713 break;
7714
7715 case GIMPLE_CALL:
7716 *def = gimple_call_lhs (*def_stmt);
7717 if (*def != NULL)
7718 break;
7719 /* FALLTHRU */
7720 default:
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7723 "unsupported defining stmt:\n");
7724 return false;
7725 }
7726
7727 return true;
7728 }
7729
7730 /* Function vect_is_simple_use_1.
7731
7732 Same as vect_is_simple_use_1 but also determines the vector operand
7733 type of OPERAND and stores it to *VECTYPE. If the definition of
7734 OPERAND is vect_uninitialized_def, vect_constant_def or
7735 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7736 is responsible to compute the best suited vector type for the
7737 scalar operand. */
7738
7739 bool
7740 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7741 bb_vec_info bb_vinfo, gimple *def_stmt,
7742 tree *def, enum vect_def_type *dt, tree *vectype)
7743 {
7744 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7745 def, dt))
7746 return false;
7747
7748 /* Now get a vector type if the def is internal, otherwise supply
7749 NULL_TREE and leave it up to the caller to figure out a proper
7750 type for the use stmt. */
7751 if (*dt == vect_internal_def
7752 || *dt == vect_induction_def
7753 || *dt == vect_reduction_def
7754 || *dt == vect_double_reduction_def
7755 || *dt == vect_nested_cycle)
7756 {
7757 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7758
7759 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7760 && !STMT_VINFO_RELEVANT (stmt_info)
7761 && !STMT_VINFO_LIVE_P (stmt_info))
7762 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7763
7764 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7765 gcc_assert (*vectype != NULL_TREE);
7766 }
7767 else if (*dt == vect_uninitialized_def
7768 || *dt == vect_constant_def
7769 || *dt == vect_external_def)
7770 *vectype = NULL_TREE;
7771 else
7772 gcc_unreachable ();
7773
7774 return true;
7775 }
7776
7777
7778 /* Function supportable_widening_operation
7779
7780 Check whether an operation represented by the code CODE is a
7781 widening operation that is supported by the target platform in
7782 vector form (i.e., when operating on arguments of type VECTYPE_IN
7783 producing a result of type VECTYPE_OUT).
7784
7785 Widening operations we currently support are NOP (CONVERT), FLOAT
7786 and WIDEN_MULT. This function checks if these operations are supported
7787 by the target platform either directly (via vector tree-codes), or via
7788 target builtins.
7789
7790 Output:
7791 - CODE1 and CODE2 are codes of vector operations to be used when
7792 vectorizing the operation, if available.
7793 - MULTI_STEP_CVT determines the number of required intermediate steps in
7794 case of multi-step conversion (like char->short->int - in that case
7795 MULTI_STEP_CVT will be 1).
7796 - INTERM_TYPES contains the intermediate type required to perform the
7797 widening operation (short in the above example). */
7798
7799 bool
7800 supportable_widening_operation (enum tree_code code, gimple stmt,
7801 tree vectype_out, tree vectype_in,
7802 enum tree_code *code1, enum tree_code *code2,
7803 int *multi_step_cvt,
7804 vec<tree> *interm_types)
7805 {
7806 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7807 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7808 struct loop *vect_loop = NULL;
7809 machine_mode vec_mode;
7810 enum insn_code icode1, icode2;
7811 optab optab1, optab2;
7812 tree vectype = vectype_in;
7813 tree wide_vectype = vectype_out;
7814 enum tree_code c1, c2;
7815 int i;
7816 tree prev_type, intermediate_type;
7817 machine_mode intermediate_mode, prev_mode;
7818 optab optab3, optab4;
7819
7820 *multi_step_cvt = 0;
7821 if (loop_info)
7822 vect_loop = LOOP_VINFO_LOOP (loop_info);
7823
7824 switch (code)
7825 {
7826 case WIDEN_MULT_EXPR:
7827 /* The result of a vectorized widening operation usually requires
7828 two vectors (because the widened results do not fit into one vector).
7829 The generated vector results would normally be expected to be
7830 generated in the same order as in the original scalar computation,
7831 i.e. if 8 results are generated in each vector iteration, they are
7832 to be organized as follows:
7833 vect1: [res1,res2,res3,res4],
7834 vect2: [res5,res6,res7,res8].
7835
7836 However, in the special case that the result of the widening
7837 operation is used in a reduction computation only, the order doesn't
7838 matter (because when vectorizing a reduction we change the order of
7839 the computation). Some targets can take advantage of this and
7840 generate more efficient code. For example, targets like Altivec,
7841 that support widen_mult using a sequence of {mult_even,mult_odd}
7842 generate the following vectors:
7843 vect1: [res1,res3,res5,res7],
7844 vect2: [res2,res4,res6,res8].
7845
7846 When vectorizing outer-loops, we execute the inner-loop sequentially
7847 (each vectorized inner-loop iteration contributes to VF outer-loop
7848 iterations in parallel). We therefore don't allow to change the
7849 order of the computation in the inner-loop during outer-loop
7850 vectorization. */
7851 /* TODO: Another case in which order doesn't *really* matter is when we
7852 widen and then contract again, e.g. (short)((int)x * y >> 8).
7853 Normally, pack_trunc performs an even/odd permute, whereas the
7854 repack from an even/odd expansion would be an interleave, which
7855 would be significantly simpler for e.g. AVX2. */
7856 /* In any case, in order to avoid duplicating the code below, recurse
7857 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7858 are properly set up for the caller. If we fail, we'll continue with
7859 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7860 if (vect_loop
7861 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7862 && !nested_in_vect_loop_p (vect_loop, stmt)
7863 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7864 stmt, vectype_out, vectype_in,
7865 code1, code2, multi_step_cvt,
7866 interm_types))
7867 {
7868 /* Elements in a vector with vect_used_by_reduction property cannot
7869 be reordered if the use chain with this property does not have the
7870 same operation. One such an example is s += a * b, where elements
7871 in a and b cannot be reordered. Here we check if the vector defined
7872 by STMT is only directly used in the reduction statement. */
7873 tree lhs = gimple_assign_lhs (stmt);
7874 use_operand_p dummy;
7875 gimple use_stmt;
7876 stmt_vec_info use_stmt_info = NULL;
7877 if (single_imm_use (lhs, &dummy, &use_stmt)
7878 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7879 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7880 return true;
7881 }
7882 c1 = VEC_WIDEN_MULT_LO_EXPR;
7883 c2 = VEC_WIDEN_MULT_HI_EXPR;
7884 break;
7885
7886 case VEC_WIDEN_MULT_EVEN_EXPR:
7887 /* Support the recursion induced just above. */
7888 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7889 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7890 break;
7891
7892 case WIDEN_LSHIFT_EXPR:
7893 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7894 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7895 break;
7896
7897 CASE_CONVERT:
7898 c1 = VEC_UNPACK_LO_EXPR;
7899 c2 = VEC_UNPACK_HI_EXPR;
7900 break;
7901
7902 case FLOAT_EXPR:
7903 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7904 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7905 break;
7906
7907 case FIX_TRUNC_EXPR:
7908 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7909 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7910 computing the operation. */
7911 return false;
7912
7913 default:
7914 gcc_unreachable ();
7915 }
7916
7917 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7918 {
7919 enum tree_code ctmp = c1;
7920 c1 = c2;
7921 c2 = ctmp;
7922 }
7923
7924 if (code == FIX_TRUNC_EXPR)
7925 {
7926 /* The signedness is determined from output operand. */
7927 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7928 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7929 }
7930 else
7931 {
7932 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7933 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7934 }
7935
7936 if (!optab1 || !optab2)
7937 return false;
7938
7939 vec_mode = TYPE_MODE (vectype);
7940 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7941 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7942 return false;
7943
7944 *code1 = c1;
7945 *code2 = c2;
7946
7947 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7948 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7949 return true;
7950
7951 /* Check if it's a multi-step conversion that can be done using intermediate
7952 types. */
7953
7954 prev_type = vectype;
7955 prev_mode = vec_mode;
7956
7957 if (!CONVERT_EXPR_CODE_P (code))
7958 return false;
7959
7960 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7961 intermediate steps in promotion sequence. We try
7962 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7963 not. */
7964 interm_types->create (MAX_INTERM_CVT_STEPS);
7965 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7966 {
7967 intermediate_mode = insn_data[icode1].operand[0].mode;
7968 intermediate_type
7969 = lang_hooks.types.type_for_mode (intermediate_mode,
7970 TYPE_UNSIGNED (prev_type));
7971 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7972 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7973
7974 if (!optab3 || !optab4
7975 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7976 || insn_data[icode1].operand[0].mode != intermediate_mode
7977 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7978 || insn_data[icode2].operand[0].mode != intermediate_mode
7979 || ((icode1 = optab_handler (optab3, intermediate_mode))
7980 == CODE_FOR_nothing)
7981 || ((icode2 = optab_handler (optab4, intermediate_mode))
7982 == CODE_FOR_nothing))
7983 break;
7984
7985 interm_types->quick_push (intermediate_type);
7986 (*multi_step_cvt)++;
7987
7988 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7989 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7990 return true;
7991
7992 prev_type = intermediate_type;
7993 prev_mode = intermediate_mode;
7994 }
7995
7996 interm_types->release ();
7997 return false;
7998 }
7999
8000
8001 /* Function supportable_narrowing_operation
8002
8003 Check whether an operation represented by the code CODE is a
8004 narrowing operation that is supported by the target platform in
8005 vector form (i.e., when operating on arguments of type VECTYPE_IN
8006 and producing a result of type VECTYPE_OUT).
8007
8008 Narrowing operations we currently support are NOP (CONVERT) and
8009 FIX_TRUNC. This function checks if these operations are supported by
8010 the target platform directly via vector tree-codes.
8011
8012 Output:
8013 - CODE1 is the code of a vector operation to be used when
8014 vectorizing the operation, if available.
8015 - MULTI_STEP_CVT determines the number of required intermediate steps in
8016 case of multi-step conversion (like int->short->char - in that case
8017 MULTI_STEP_CVT will be 1).
8018 - INTERM_TYPES contains the intermediate type required to perform the
8019 narrowing operation (short in the above example). */
8020
8021 bool
8022 supportable_narrowing_operation (enum tree_code code,
8023 tree vectype_out, tree vectype_in,
8024 enum tree_code *code1, int *multi_step_cvt,
8025 vec<tree> *interm_types)
8026 {
8027 machine_mode vec_mode;
8028 enum insn_code icode1;
8029 optab optab1, interm_optab;
8030 tree vectype = vectype_in;
8031 tree narrow_vectype = vectype_out;
8032 enum tree_code c1;
8033 tree intermediate_type;
8034 machine_mode intermediate_mode, prev_mode;
8035 int i;
8036 bool uns;
8037
8038 *multi_step_cvt = 0;
8039 switch (code)
8040 {
8041 CASE_CONVERT:
8042 c1 = VEC_PACK_TRUNC_EXPR;
8043 break;
8044
8045 case FIX_TRUNC_EXPR:
8046 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8047 break;
8048
8049 case FLOAT_EXPR:
8050 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8051 tree code and optabs used for computing the operation. */
8052 return false;
8053
8054 default:
8055 gcc_unreachable ();
8056 }
8057
8058 if (code == FIX_TRUNC_EXPR)
8059 /* The signedness is determined from output operand. */
8060 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8061 else
8062 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8063
8064 if (!optab1)
8065 return false;
8066
8067 vec_mode = TYPE_MODE (vectype);
8068 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8069 return false;
8070
8071 *code1 = c1;
8072
8073 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8074 return true;
8075
8076 /* Check if it's a multi-step conversion that can be done using intermediate
8077 types. */
8078 prev_mode = vec_mode;
8079 if (code == FIX_TRUNC_EXPR)
8080 uns = TYPE_UNSIGNED (vectype_out);
8081 else
8082 uns = TYPE_UNSIGNED (vectype);
8083
8084 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8085 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8086 costly than signed. */
8087 if (code == FIX_TRUNC_EXPR && uns)
8088 {
8089 enum insn_code icode2;
8090
8091 intermediate_type
8092 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8093 interm_optab
8094 = optab_for_tree_code (c1, intermediate_type, optab_default);
8095 if (interm_optab != unknown_optab
8096 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8097 && insn_data[icode1].operand[0].mode
8098 == insn_data[icode2].operand[0].mode)
8099 {
8100 uns = false;
8101 optab1 = interm_optab;
8102 icode1 = icode2;
8103 }
8104 }
8105
8106 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8107 intermediate steps in promotion sequence. We try
8108 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8109 interm_types->create (MAX_INTERM_CVT_STEPS);
8110 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8111 {
8112 intermediate_mode = insn_data[icode1].operand[0].mode;
8113 intermediate_type
8114 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8115 interm_optab
8116 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8117 optab_default);
8118 if (!interm_optab
8119 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8120 || insn_data[icode1].operand[0].mode != intermediate_mode
8121 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8122 == CODE_FOR_nothing))
8123 break;
8124
8125 interm_types->quick_push (intermediate_type);
8126 (*multi_step_cvt)++;
8127
8128 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8129 return true;
8130
8131 prev_mode = intermediate_mode;
8132 optab1 = interm_optab;
8133 }
8134
8135 interm_types->release ();
8136 return false;
8137 }