revert: [multiple changes]
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "target.h"
34 #include "predict.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "basic-block.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "tree-eh.h"
44 #include "gimple-expr.h"
45 #include "is-a.h"
46 #include "gimple.h"
47 #include "gimplify.h"
48 #include "gimple-iterator.h"
49 #include "gimplify-me.h"
50 #include "gimple-ssa.h"
51 #include "tree-cfg.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-ssa-loop-manip.h"
57 #include "cfgloop.h"
58 #include "tree-ssa-loop.h"
59 #include "tree-scalar-evolution.h"
60 #include "rtl.h"
61 #include "flags.h"
62 #include "insn-config.h"
63 #include "expmed.h"
64 #include "dojump.h"
65 #include "explow.h"
66 #include "calls.h"
67 #include "emit-rtl.h"
68 #include "varasm.h"
69 #include "stmt.h"
70 #include "expr.h"
71 #include "recog.h" /* FIXME: for insn_data */
72 #include "insn-codes.h"
73 #include "optabs.h"
74 #include "diagnostic-core.h"
75 #include "tree-vectorizer.h"
76 #include "plugin-api.h"
77 #include "ipa-ref.h"
78 #include "cgraph.h"
79 #include "builtins.h"
80
81 /* For lang_hooks.types.type_for_mode. */
82 #include "langhooks.h"
83
84 /* Return the vectorized type for the given statement. */
85
86 tree
87 stmt_vectype (struct _stmt_vec_info *stmt_info)
88 {
89 return STMT_VINFO_VECTYPE (stmt_info);
90 }
91
92 /* Return TRUE iff the given statement is in an inner loop relative to
93 the loop being vectorized. */
94 bool
95 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
96 {
97 gimple stmt = STMT_VINFO_STMT (stmt_info);
98 basic_block bb = gimple_bb (stmt);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
100 struct loop* loop;
101
102 if (!loop_vinfo)
103 return false;
104
105 loop = LOOP_VINFO_LOOP (loop_vinfo);
106
107 return (bb->loop_father == loop->inner);
108 }
109
110 /* Record the cost of a statement, either by directly informing the
111 target model or by saving it in a vector for later processing.
112 Return a preliminary estimate of the statement's cost. */
113
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 int misalign, enum vect_cost_model_location where)
118 {
119 if (body_cost_vec)
120 {
121 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
122 add_stmt_info_to_vec (body_cost_vec, count, kind,
123 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
124 misalign);
125 return (unsigned)
126 (builtin_vectorization_cost (kind, vectype, misalign) * count);
127
128 }
129 else
130 {
131 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
132 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
133 void *target_cost_data;
134
135 if (loop_vinfo)
136 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
137 else
138 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
139
140 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
141 misalign, where);
142 }
143 }
144
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 {
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
152 }
153
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT and the vector is associated
157 with scalar destination SCALAR_DEST. */
158
159 static tree
160 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
161 tree array, unsigned HOST_WIDE_INT n)
162 {
163 tree vect_type, vect, vect_name, array_ref;
164 gimple new_stmt;
165
166 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
167 vect_type = TREE_TYPE (TREE_TYPE (array));
168 vect = vect_create_destination_var (scalar_dest, vect_type);
169 array_ref = build4 (ARRAY_REF, vect_type, array,
170 build_int_cst (size_type_node, n),
171 NULL_TREE, NULL_TREE);
172
173 new_stmt = gimple_build_assign (vect, array_ref);
174 vect_name = make_ssa_name (vect, new_stmt);
175 gimple_assign_set_lhs (new_stmt, vect_name);
176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
177
178 return vect_name;
179 }
180
181 /* ARRAY is an array of vectors created by create_vector_array.
182 Emit code to store SSA_NAME VECT in index N of the array.
183 The store is part of the vectorization of STMT. */
184
185 static void
186 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
187 tree array, unsigned HOST_WIDE_INT n)
188 {
189 tree array_ref;
190 gimple new_stmt;
191
192 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
193 build_int_cst (size_type_node, n),
194 NULL_TREE, NULL_TREE);
195
196 new_stmt = gimple_build_assign (array_ref, vect);
197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
198 }
199
200 /* PTR is a pointer to an array of type TYPE. Return a representation
201 of *PTR. The memory reference replaces those in FIRST_DR
202 (and its group). */
203
204 static tree
205 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
206 {
207 tree mem_ref, alias_ptr_type;
208
209 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
210 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
211 /* Arrays have the same alignment as their type. */
212 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
213 return mem_ref;
214 }
215
216 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
217
218 /* Function vect_mark_relevant.
219
220 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
221
222 static void
223 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
224 enum vect_relevant relevant, bool live_p,
225 bool used_in_pattern)
226 {
227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
228 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
229 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
230 gimple pattern_stmt;
231
232 if (dump_enabled_p ())
233 dump_printf_loc (MSG_NOTE, vect_location,
234 "mark relevant %d, live %d.\n", relevant, live_p);
235
236 /* If this stmt is an original stmt in a pattern, we might need to mark its
237 related pattern stmt instead of the original stmt. However, such stmts
238 may have their own uses that are not in any pattern, in such cases the
239 stmt itself should be marked. */
240 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
241 {
242 bool found = false;
243 if (!used_in_pattern)
244 {
245 imm_use_iterator imm_iter;
246 use_operand_p use_p;
247 gimple use_stmt;
248 tree lhs;
249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
251
252 if (is_gimple_assign (stmt))
253 lhs = gimple_assign_lhs (stmt);
254 else
255 lhs = gimple_call_lhs (stmt);
256
257 /* This use is out of pattern use, if LHS has other uses that are
258 pattern uses, we should mark the stmt itself, and not the pattern
259 stmt. */
260 if (lhs && TREE_CODE (lhs) == SSA_NAME)
261 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
262 {
263 if (is_gimple_debug (USE_STMT (use_p)))
264 continue;
265 use_stmt = USE_STMT (use_p);
266
267 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
268 continue;
269
270 if (vinfo_for_stmt (use_stmt)
271 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
272 {
273 found = true;
274 break;
275 }
276 }
277 }
278
279 if (!found)
280 {
281 /* This is the last stmt in a sequence that was detected as a
282 pattern that can potentially be vectorized. Don't mark the stmt
283 as relevant/live because it's not going to be vectorized.
284 Instead mark the pattern-stmt that replaces it. */
285
286 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
287
288 if (dump_enabled_p ())
289 dump_printf_loc (MSG_NOTE, vect_location,
290 "last stmt in pattern. don't mark"
291 " relevant/live.\n");
292 stmt_info = vinfo_for_stmt (pattern_stmt);
293 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
294 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
295 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
296 stmt = pattern_stmt;
297 }
298 }
299
300 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
301 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
302 STMT_VINFO_RELEVANT (stmt_info) = relevant;
303
304 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
305 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 {
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE, vect_location,
309 "already marked relevant/live.\n");
310 return;
311 }
312
313 worklist->safe_push (stmt);
314 }
315
316
317 /* Function vect_stmt_relevant_p.
318
319 Return true if STMT in loop that is represented by LOOP_VINFO is
320 "relevant for vectorization".
321
322 A stmt is considered "relevant for vectorization" if:
323 - it has uses outside the loop.
324 - it has vdefs (it alters memory).
325 - control stmts in the loop (except for the exit condition).
326
327 CHECKME: what other side effects would the vectorizer allow? */
328
329 static bool
330 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
331 enum vect_relevant *relevant, bool *live_p)
332 {
333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
334 ssa_op_iter op_iter;
335 imm_use_iterator imm_iter;
336 use_operand_p use_p;
337 def_operand_p def_p;
338
339 *relevant = vect_unused_in_scope;
340 *live_p = false;
341
342 /* cond stmt other than loop exit cond. */
343 if (is_ctrl_stmt (stmt)
344 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
345 != loop_exit_ctrl_vec_info_type)
346 *relevant = vect_used_in_scope;
347
348 /* changing memory. */
349 if (gimple_code (stmt) != GIMPLE_PHI)
350 if (gimple_vdef (stmt)
351 && !gimple_clobber_p (stmt))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt has vdefs.\n");
356 *relevant = vect_used_in_scope;
357 }
358
359 /* uses outside the loop. */
360 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
361 {
362 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
363 {
364 basic_block bb = gimple_bb (USE_STMT (use_p));
365 if (!flow_bb_inside_loop_p (loop, bb))
366 {
367 if (dump_enabled_p ())
368 dump_printf_loc (MSG_NOTE, vect_location,
369 "vec_stmt_relevant_p: used out of loop.\n");
370
371 if (is_gimple_debug (USE_STMT (use_p)))
372 continue;
373
374 /* We expect all such uses to be in the loop exit phis
375 (because of loop closed form) */
376 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
377 gcc_assert (bb == single_exit (loop)->dest);
378
379 *live_p = true;
380 }
381 }
382 }
383
384 return (*live_p || *relevant);
385 }
386
387
388 /* Function exist_non_indexing_operands_for_use_p
389
390 USE is one of the uses attached to STMT. Check if USE is
391 used in STMT for anything other than indexing an array. */
392
393 static bool
394 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
395 {
396 tree operand;
397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
398
399 /* USE corresponds to some operand in STMT. If there is no data
400 reference in STMT, then any operand that corresponds to USE
401 is not indexing an array. */
402 if (!STMT_VINFO_DATA_REF (stmt_info))
403 return true;
404
405 /* STMT has a data_ref. FORNOW this means that its of one of
406 the following forms:
407 -1- ARRAY_REF = var
408 -2- var = ARRAY_REF
409 (This should have been verified in analyze_data_refs).
410
411 'var' in the second case corresponds to a def, not a use,
412 so USE cannot correspond to any operands that are not used
413 for array indexing.
414
415 Therefore, all we need to check is if STMT falls into the
416 first case, and whether var corresponds to USE. */
417
418 if (!gimple_assign_copy_p (stmt))
419 {
420 if (is_gimple_call (stmt)
421 && gimple_call_internal_p (stmt))
422 switch (gimple_call_internal_fn (stmt))
423 {
424 case IFN_MASK_STORE:
425 operand = gimple_call_arg (stmt, 3);
426 if (operand == use)
427 return true;
428 /* FALLTHRU */
429 case IFN_MASK_LOAD:
430 operand = gimple_call_arg (stmt, 2);
431 if (operand == use)
432 return true;
433 break;
434 default:
435 break;
436 }
437 return false;
438 }
439
440 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
441 return false;
442 operand = gimple_assign_rhs1 (stmt);
443 if (TREE_CODE (operand) != SSA_NAME)
444 return false;
445
446 if (operand == use)
447 return true;
448
449 return false;
450 }
451
452
453 /*
454 Function process_use.
455
456 Inputs:
457 - a USE in STMT in a loop represented by LOOP_VINFO
458 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
459 that defined USE. This is done by calling mark_relevant and passing it
460 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
461 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
462 be performed.
463
464 Outputs:
465 Generally, LIVE_P and RELEVANT are used to define the liveness and
466 relevance info of the DEF_STMT of this USE:
467 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
468 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
469 Exceptions:
470 - case 1: If USE is used only for address computations (e.g. array indexing),
471 which does not need to be directly vectorized, then the liveness/relevance
472 of the respective DEF_STMT is left unchanged.
473 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
474 skip DEF_STMT cause it had already been processed.
475 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
476 be modified accordingly.
477
478 Return true if everything is as expected. Return false otherwise. */
479
480 static bool
481 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
482 enum vect_relevant relevant, vec<gimple> *worklist,
483 bool force)
484 {
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
486 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
487 stmt_vec_info dstmt_vinfo;
488 basic_block bb, def_bb;
489 tree def;
490 gimple def_stmt;
491 enum vect_def_type dt;
492
493 /* case 1: we are only interested in uses that need to be vectorized. Uses
494 that are used for address computation are not considered relevant. */
495 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
496 return true;
497
498 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
499 {
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
502 "not vectorized: unsupported use in stmt.\n");
503 return false;
504 }
505
506 if (!def_stmt || gimple_nop_p (def_stmt))
507 return true;
508
509 def_bb = gimple_bb (def_stmt);
510 if (!flow_bb_inside_loop_p (loop, def_bb))
511 {
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
514 return true;
515 }
516
517 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
518 DEF_STMT must have already been processed, because this should be the
519 only way that STMT, which is a reduction-phi, was put in the worklist,
520 as there should be no other uses for DEF_STMT in the loop. So we just
521 check that everything is as expected, and we are done. */
522 dstmt_vinfo = vinfo_for_stmt (def_stmt);
523 bb = gimple_bb (stmt);
524 if (gimple_code (stmt) == GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 && gimple_code (def_stmt) != GIMPLE_PHI
527 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
528 && bb->loop_father == def_bb->loop_father)
529 {
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_NOTE, vect_location,
532 "reduc-stmt defining reduc-phi in the same nest.\n");
533 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
534 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
535 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
536 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
537 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
538 return true;
539 }
540
541 /* case 3a: outer-loop stmt defining an inner-loop stmt:
542 outer-loop-header-bb:
543 d = def_stmt
544 inner-loop:
545 stmt # use (d)
546 outer-loop-tail-bb:
547 ... */
548 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
549 {
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "outer-loop def-stmt defining inner-loop stmt.\n");
553
554 switch (relevant)
555 {
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
558 vect_used_in_scope : vect_unused_in_scope;
559 break;
560
561 case vect_used_in_outer_by_reduction:
562 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
563 relevant = vect_used_by_reduction;
564 break;
565
566 case vect_used_in_outer:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_in_scope;
569 break;
570
571 case vect_used_in_scope:
572 break;
573
574 default:
575 gcc_unreachable ();
576 }
577 }
578
579 /* case 3b: inner-loop stmt defining an outer-loop stmt:
580 outer-loop-header-bb:
581 ...
582 inner-loop:
583 d = def_stmt
584 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
585 stmt # use (d) */
586 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
587 {
588 if (dump_enabled_p ())
589 dump_printf_loc (MSG_NOTE, vect_location,
590 "inner-loop def-stmt defining outer-loop stmt.\n");
591
592 switch (relevant)
593 {
594 case vect_unused_in_scope:
595 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
596 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
597 vect_used_in_outer_by_reduction : vect_unused_in_scope;
598 break;
599
600 case vect_used_by_reduction:
601 relevant = vect_used_in_outer_by_reduction;
602 break;
603
604 case vect_used_in_scope:
605 relevant = vect_used_in_outer;
606 break;
607
608 default:
609 gcc_unreachable ();
610 }
611 }
612
613 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
614 is_pattern_stmt_p (stmt_vinfo));
615 return true;
616 }
617
618
619 /* Function vect_mark_stmts_to_be_vectorized.
620
621 Not all stmts in the loop need to be vectorized. For example:
622
623 for i...
624 for j...
625 1. T0 = i + j
626 2. T1 = a[T0]
627
628 3. j = j + 1
629
630 Stmt 1 and 3 do not need to be vectorized, because loop control and
631 addressing of vectorized data-refs are handled differently.
632
633 This pass detects such stmts. */
634
635 bool
636 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
637 {
638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
639 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
640 unsigned int nbbs = loop->num_nodes;
641 gimple_stmt_iterator si;
642 gimple stmt;
643 unsigned int i;
644 stmt_vec_info stmt_vinfo;
645 basic_block bb;
646 gimple phi;
647 bool live_p;
648 enum vect_relevant relevant, tmp_relevant;
649 enum vect_def_type def_type;
650
651 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE, vect_location,
653 "=== vect_mark_stmts_to_be_vectorized ===\n");
654
655 auto_vec<gimple, 64> worklist;
656
657 /* 1. Init worklist. */
658 for (i = 0; i < nbbs; i++)
659 {
660 bb = bbs[i];
661 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 {
663 phi = gsi_stmt (si);
664 if (dump_enabled_p ())
665 {
666 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
668 }
669
670 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
671 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
672 }
673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
674 {
675 stmt = gsi_stmt (si);
676 if (dump_enabled_p ())
677 {
678 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
680 }
681
682 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
683 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
684 }
685 }
686
687 /* 2. Process_worklist */
688 while (worklist.length () > 0)
689 {
690 use_operand_p use_p;
691 ssa_op_iter iter;
692
693 stmt = worklist.pop ();
694 if (dump_enabled_p ())
695 {
696 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
698 }
699
700 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
701 (DEF_STMT) as relevant/irrelevant and live/dead according to the
702 liveness and relevance properties of STMT. */
703 stmt_vinfo = vinfo_for_stmt (stmt);
704 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
705 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
706
707 /* Generally, the liveness and relevance properties of STMT are
708 propagated as is to the DEF_STMTs of its USEs:
709 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
710 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
711
712 One exception is when STMT has been identified as defining a reduction
713 variable; in this case we set the liveness/relevance as follows:
714 live_p = false
715 relevant = vect_used_by_reduction
716 This is because we distinguish between two kinds of relevant stmts -
717 those that are used by a reduction computation, and those that are
718 (also) used by a regular computation. This allows us later on to
719 identify stmts that are used solely by a reduction, and therefore the
720 order of the results that they produce does not have to be kept. */
721
722 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
723 tmp_relevant = relevant;
724 switch (def_type)
725 {
726 case vect_reduction_def:
727 switch (tmp_relevant)
728 {
729 case vect_unused_in_scope:
730 relevant = vect_used_by_reduction;
731 break;
732
733 case vect_used_by_reduction:
734 if (gimple_code (stmt) == GIMPLE_PHI)
735 break;
736 /* fall through */
737
738 default:
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "unsupported use of reduction.\n");
742 return false;
743 }
744
745 live_p = false;
746 break;
747
748 case vect_nested_cycle:
749 if (tmp_relevant != vect_unused_in_scope
750 && tmp_relevant != vect_used_in_outer_by_reduction
751 && tmp_relevant != vect_used_in_outer)
752 {
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "unsupported use of nested cycle.\n");
756
757 return false;
758 }
759
760 live_p = false;
761 break;
762
763 case vect_double_reduction_def:
764 if (tmp_relevant != vect_unused_in_scope
765 && tmp_relevant != vect_used_by_reduction)
766 {
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "unsupported use of double reduction.\n");
770
771 return false;
772 }
773
774 live_p = false;
775 break;
776
777 default:
778 break;
779 }
780
781 if (is_pattern_stmt_p (stmt_vinfo))
782 {
783 /* Pattern statements are not inserted into the code, so
784 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
785 have to scan the RHS or function arguments instead. */
786 if (is_gimple_assign (stmt))
787 {
788 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
789 tree op = gimple_assign_rhs1 (stmt);
790
791 i = 1;
792 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
793 {
794 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
795 live_p, relevant, &worklist, false)
796 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
797 live_p, relevant, &worklist, false))
798 return false;
799 i = 2;
800 }
801 for (; i < gimple_num_ops (stmt); i++)
802 {
803 op = gimple_op (stmt, i);
804 if (TREE_CODE (op) == SSA_NAME
805 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
806 &worklist, false))
807 return false;
808 }
809 }
810 else if (is_gimple_call (stmt))
811 {
812 for (i = 0; i < gimple_call_num_args (stmt); i++)
813 {
814 tree arg = gimple_call_arg (stmt, i);
815 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
816 &worklist, false))
817 return false;
818 }
819 }
820 }
821 else
822 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
823 {
824 tree op = USE_FROM_PTR (use_p);
825 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
826 &worklist, false))
827 return false;
828 }
829
830 if (STMT_VINFO_GATHER_P (stmt_vinfo))
831 {
832 tree off;
833 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
834 gcc_assert (decl);
835 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
836 &worklist, true))
837 return false;
838 }
839 } /* while worklist */
840
841 return true;
842 }
843
844
845 /* Function vect_model_simple_cost.
846
847 Models cost for simple operations, i.e. those that only emit ncopies of a
848 single op. Right now, this does not account for multiple insns that could
849 be generated for the single vector op. We will handle that shortly. */
850
851 void
852 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
853 enum vect_def_type *dt,
854 stmt_vector_for_cost *prologue_cost_vec,
855 stmt_vector_for_cost *body_cost_vec)
856 {
857 int i;
858 int inside_cost = 0, prologue_cost = 0;
859
860 /* The SLP costs were already calculated during SLP tree build. */
861 if (PURE_SLP_STMT (stmt_info))
862 return;
863
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
869
870 /* Pass the inside-of-loop statements to the target-specific cost model. */
871 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
872 stmt_info, 0, vect_body);
873
874 if (dump_enabled_p ())
875 dump_printf_loc (MSG_NOTE, vect_location,
876 "vect_model_simple_cost: inside_cost = %d, "
877 "prologue_cost = %d .\n", inside_cost, prologue_cost);
878 }
879
880
881 /* Model cost for type demotion and promotion operations. PWR is normally
882 zero for single-step promotions and demotions. It will be one if
883 two-step promotion/demotion is required, and so on. Each additional
884 step doubles the number of instructions required. */
885
886 static void
887 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
888 enum vect_def_type *dt, int pwr)
889 {
890 int i, tmp;
891 int inside_cost = 0, prologue_cost = 0;
892 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
893 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
894 void *target_cost_data;
895
896 /* The SLP costs were already calculated during SLP tree build. */
897 if (PURE_SLP_STMT (stmt_info))
898 return;
899
900 if (loop_vinfo)
901 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
902 else
903 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
904
905 for (i = 0; i < pwr + 1; i++)
906 {
907 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
908 (i + 1) : i;
909 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
910 vec_promote_demote, stmt_info, 0,
911 vect_body);
912 }
913
914 /* FORNOW: Assuming maximum 2 args per stmts. */
915 for (i = 0; i < 2; i++)
916 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
917 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
918 stmt_info, 0, vect_prologue);
919
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_model_promotion_demotion_cost: inside_cost = %d, "
923 "prologue_cost = %d .\n", inside_cost, prologue_cost);
924 }
925
926 /* Function vect_cost_group_size
927
928 For grouped load or store, return the group_size only if it is the first
929 load or store of a group, else return 1. This ensures that group size is
930 only returned once per group. */
931
932 static int
933 vect_cost_group_size (stmt_vec_info stmt_info)
934 {
935 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
936
937 if (first_stmt == STMT_VINFO_STMT (stmt_info))
938 return GROUP_SIZE (stmt_info);
939
940 return 1;
941 }
942
943
944 /* Function vect_model_store_cost
945
946 Models cost for stores. In the case of grouped accesses, one access
947 has the overhead of the grouped access attributed to it. */
948
949 void
950 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
951 bool store_lanes_p, enum vect_def_type dt,
952 slp_tree slp_node,
953 stmt_vector_for_cost *prologue_cost_vec,
954 stmt_vector_for_cost *body_cost_vec)
955 {
956 int group_size;
957 unsigned int inside_cost = 0, prologue_cost = 0;
958 struct data_reference *first_dr;
959 gimple first_stmt;
960
961 if (dt == vect_constant_def || dt == vect_external_def)
962 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
963 stmt_info, 0, vect_prologue);
964
965 /* Grouped access? */
966 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
967 {
968 if (slp_node)
969 {
970 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
971 group_size = 1;
972 }
973 else
974 {
975 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
976 group_size = vect_cost_group_size (stmt_info);
977 }
978
979 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
980 }
981 /* Not a grouped access. */
982 else
983 {
984 group_size = 1;
985 first_dr = STMT_VINFO_DATA_REF (stmt_info);
986 }
987
988 /* We assume that the cost of a single store-lanes instruction is
989 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
990 access is instead being provided by a permute-and-store operation,
991 include the cost of the permutes. */
992 if (!store_lanes_p && group_size > 1
993 && !STMT_VINFO_STRIDED_P (stmt_info))
994 {
995 /* Uses a high and low interleave or shuffle operations for each
996 needed permute. */
997 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
998 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
999 stmt_info, 0, vect_body);
1000
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: strided group_size = %d .\n",
1004 group_size);
1005 }
1006
1007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1008 /* Costs of the stores. */
1009 if (STMT_VINFO_STRIDED_P (stmt_info)
1010 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1011 {
1012 /* N scalar stores plus extracting the elements. */
1013 inside_cost += record_stmt_cost (body_cost_vec,
1014 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1015 scalar_store, stmt_info, 0, vect_body);
1016 }
1017 else
1018 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1019
1020 if (STMT_VINFO_STRIDED_P (stmt_info))
1021 inside_cost += record_stmt_cost (body_cost_vec,
1022 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1023 vec_to_scalar, stmt_info, 0, vect_body);
1024
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "vect_model_store_cost: inside_cost = %d, "
1028 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1029 }
1030
1031
1032 /* Calculate cost of DR's memory access. */
1033 void
1034 vect_get_store_cost (struct data_reference *dr, int ncopies,
1035 unsigned int *inside_cost,
1036 stmt_vector_for_cost *body_cost_vec)
1037 {
1038 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1039 gimple stmt = DR_STMT (dr);
1040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1041
1042 switch (alignment_support_scheme)
1043 {
1044 case dr_aligned:
1045 {
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vector_store, stmt_info, 0,
1048 vect_body);
1049
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "vect_model_store_cost: aligned.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_supported:
1057 {
1058 /* Here, we assign an additional cost for the unaligned store. */
1059 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1060 unaligned_store, stmt_info,
1061 DR_MISALIGNMENT (dr), vect_body);
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_NOTE, vect_location,
1064 "vect_model_store_cost: unaligned supported by "
1065 "hardware.\n");
1066 break;
1067 }
1068
1069 case dr_unaligned_unsupported:
1070 {
1071 *inside_cost = VECT_MAX_COST;
1072
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1075 "vect_model_store_cost: unsupported access.\n");
1076 break;
1077 }
1078
1079 default:
1080 gcc_unreachable ();
1081 }
1082 }
1083
1084
1085 /* Function vect_model_load_cost
1086
1087 Models cost for loads. In the case of grouped accesses, the last access
1088 has the overhead of the grouped access attributed to it. Since unaligned
1089 accesses are supported for loads, we also account for the costs of the
1090 access scheme chosen. */
1091
1092 void
1093 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1094 bool load_lanes_p, slp_tree slp_node,
1095 stmt_vector_for_cost *prologue_cost_vec,
1096 stmt_vector_for_cost *body_cost_vec)
1097 {
1098 int group_size;
1099 gimple first_stmt;
1100 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1101 unsigned int inside_cost = 0, prologue_cost = 0;
1102
1103 /* Grouped accesses? */
1104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1105 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1106 {
1107 group_size = vect_cost_group_size (stmt_info);
1108 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1109 }
1110 /* Not a grouped access. */
1111 else
1112 {
1113 group_size = 1;
1114 first_dr = dr;
1115 }
1116
1117 /* We assume that the cost of a single load-lanes instruction is
1118 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1119 access is instead being provided by a load-and-permute operation,
1120 include the cost of the permutes. */
1121 if (!load_lanes_p && group_size > 1
1122 && !STMT_VINFO_STRIDED_P (stmt_info))
1123 {
1124 /* Uses an even and odd extract operations or shuffle operations
1125 for each needed permute. */
1126 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1127 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1128 stmt_info, 0, vect_body);
1129
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: strided group_size = %d .\n",
1133 group_size);
1134 }
1135
1136 /* The loads themselves. */
1137 if (STMT_VINFO_STRIDED_P (stmt_info)
1138 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1139 {
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142 inside_cost += record_stmt_cost (body_cost_vec,
1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144 scalar_load, stmt_info, 0, vect_body);
1145 }
1146 else
1147 vect_get_load_cost (first_dr, ncopies,
1148 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1149 || group_size > 1 || slp_node),
1150 &inside_cost, &prologue_cost,
1151 prologue_cost_vec, body_cost_vec, true);
1152 if (STMT_VINFO_STRIDED_P (stmt_info))
1153 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1154 stmt_info, 0, vect_body);
1155
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: inside_cost = %d, "
1159 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1160 }
1161
1162
1163 /* Calculate cost of DR's memory access. */
1164 void
1165 vect_get_load_cost (struct data_reference *dr, int ncopies,
1166 bool add_realign_cost, unsigned int *inside_cost,
1167 unsigned int *prologue_cost,
1168 stmt_vector_for_cost *prologue_cost_vec,
1169 stmt_vector_for_cost *body_cost_vec,
1170 bool record_prologue_costs)
1171 {
1172 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1173 gimple stmt = DR_STMT (dr);
1174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1175
1176 switch (alignment_support_scheme)
1177 {
1178 case dr_aligned:
1179 {
1180 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1181 stmt_info, 0, vect_body);
1182
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_NOTE, vect_location,
1185 "vect_model_load_cost: aligned.\n");
1186
1187 break;
1188 }
1189 case dr_unaligned_supported:
1190 {
1191 /* Here, we assign an additional cost for the unaligned load. */
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1193 unaligned_load, stmt_info,
1194 DR_MISALIGNMENT (dr), vect_body);
1195
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned supported by "
1199 "hardware.\n");
1200
1201 break;
1202 }
1203 case dr_explicit_realign:
1204 {
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1206 vector_load, stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1208 vec_perm, stmt_info, 0, vect_body);
1209
1210 /* FIXME: If the misalignment remains fixed across the iterations of
1211 the containing loop, the following cost should be added to the
1212 prologue costs. */
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1215 stmt_info, 0, vect_body);
1216
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "vect_model_load_cost: explicit realign\n");
1220
1221 break;
1222 }
1223 case dr_explicit_realign_optimized:
1224 {
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "vect_model_load_cost: unaligned software "
1228 "pipelined.\n");
1229
1230 /* Unaligned software pipeline has a load of an address, an initial
1231 load, and possibly a mask operation to "prime" the loop. However,
1232 if this is an access in a group of loads, which provide grouped
1233 access, then the above cost should only be considered for one
1234 access in the group. Inside the loop, there is a load op
1235 and a realignment op. */
1236
1237 if (add_realign_cost && record_prologue_costs)
1238 {
1239 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1240 vector_stmt, stmt_info,
1241 0, vect_prologue);
1242 if (targetm.vectorize.builtin_mask_for_load)
1243 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1244 vector_stmt, stmt_info,
1245 0, vect_prologue);
1246 }
1247
1248 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1249 stmt_info, 0, vect_body);
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1251 stmt_info, 0, vect_body);
1252
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_model_load_cost: explicit realign optimized"
1256 "\n");
1257
1258 break;
1259 }
1260
1261 case dr_unaligned_unsupported:
1262 {
1263 *inside_cost = VECT_MAX_COST;
1264
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1267 "vect_model_load_cost: unsupported access.\n");
1268 break;
1269 }
1270
1271 default:
1272 gcc_unreachable ();
1273 }
1274 }
1275
1276 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1277 the loop preheader for the vectorized stmt STMT. */
1278
1279 static void
1280 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1281 {
1282 if (gsi)
1283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1284 else
1285 {
1286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1288
1289 if (loop_vinfo)
1290 {
1291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1292 basic_block new_bb;
1293 edge pe;
1294
1295 if (nested_in_vect_loop_p (loop, stmt))
1296 loop = loop->inner;
1297
1298 pe = loop_preheader_edge (loop);
1299 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1300 gcc_assert (!new_bb);
1301 }
1302 else
1303 {
1304 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1305 basic_block bb;
1306 gimple_stmt_iterator gsi_bb_start;
1307
1308 gcc_assert (bb_vinfo);
1309 bb = BB_VINFO_BB (bb_vinfo);
1310 gsi_bb_start = gsi_after_labels (bb);
1311 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1312 }
1313 }
1314
1315 if (dump_enabled_p ())
1316 {
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "created new init_stmt: ");
1319 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1320 }
1321 }
1322
1323 /* Function vect_init_vector.
1324
1325 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1326 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1327 vector type a vector with all elements equal to VAL is created first.
1328 Place the initialization at BSI if it is not NULL. Otherwise, place the
1329 initialization at the loop preheader.
1330 Return the DEF of INIT_STMT.
1331 It will be used in the vectorization of STMT. */
1332
1333 tree
1334 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1335 {
1336 tree new_var;
1337 gimple init_stmt;
1338 tree vec_oprnd;
1339 tree new_temp;
1340
1341 if (TREE_CODE (type) == VECTOR_TYPE
1342 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1343 {
1344 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1345 {
1346 if (CONSTANT_CLASS_P (val))
1347 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1348 else
1349 {
1350 new_temp = make_ssa_name (TREE_TYPE (type));
1351 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1352 vect_init_vector_1 (stmt, init_stmt, gsi);
1353 val = new_temp;
1354 }
1355 }
1356 val = build_vector_from_val (type, val);
1357 }
1358
1359 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1360 init_stmt = gimple_build_assign (new_var, val);
1361 new_temp = make_ssa_name (new_var, init_stmt);
1362 gimple_assign_set_lhs (init_stmt, new_temp);
1363 vect_init_vector_1 (stmt, init_stmt, gsi);
1364 vec_oprnd = gimple_assign_lhs (init_stmt);
1365 return vec_oprnd;
1366 }
1367
1368
1369 /* Function vect_get_vec_def_for_operand.
1370
1371 OP is an operand in STMT. This function returns a (vector) def that will be
1372 used in the vectorized stmt for STMT.
1373
1374 In the case that OP is an SSA_NAME which is defined in the loop, then
1375 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1376
1377 In case OP is an invariant or constant, a new stmt that creates a vector def
1378 needs to be introduced. */
1379
1380 tree
1381 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1382 {
1383 tree vec_oprnd;
1384 gimple vec_stmt;
1385 gimple def_stmt;
1386 stmt_vec_info def_stmt_info = NULL;
1387 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1388 unsigned int nunits;
1389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1390 tree def;
1391 enum vect_def_type dt;
1392 bool is_simple_use;
1393 tree vector_type;
1394
1395 if (dump_enabled_p ())
1396 {
1397 dump_printf_loc (MSG_NOTE, vect_location,
1398 "vect_get_vec_def_for_operand: ");
1399 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1400 dump_printf (MSG_NOTE, "\n");
1401 }
1402
1403 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1404 &def_stmt, &def, &dt);
1405 gcc_assert (is_simple_use);
1406 if (dump_enabled_p ())
1407 {
1408 int loc_printed = 0;
1409 if (def)
1410 {
1411 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1412 loc_printed = 1;
1413 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1414 dump_printf (MSG_NOTE, "\n");
1415 }
1416 if (def_stmt)
1417 {
1418 if (loc_printed)
1419 dump_printf (MSG_NOTE, " def_stmt = ");
1420 else
1421 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1422 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1423 }
1424 }
1425
1426 switch (dt)
1427 {
1428 /* Case 1: operand is a constant. */
1429 case vect_constant_def:
1430 {
1431 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1432 gcc_assert (vector_type);
1433 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1434
1435 if (scalar_def)
1436 *scalar_def = op;
1437
1438 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "Create vector_cst. nunits = %d\n", nunits);
1442
1443 return vect_init_vector (stmt, op, vector_type, NULL);
1444 }
1445
1446 /* Case 2: operand is defined outside the loop - loop invariant. */
1447 case vect_external_def:
1448 {
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1450 gcc_assert (vector_type);
1451
1452 if (scalar_def)
1453 *scalar_def = def;
1454
1455 /* Create 'vec_inv = {inv,inv,..,inv}' */
1456 if (dump_enabled_p ())
1457 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1458
1459 return vect_init_vector (stmt, def, vector_type, NULL);
1460 }
1461
1462 /* Case 3: operand is defined inside the loop. */
1463 case vect_internal_def:
1464 {
1465 if (scalar_def)
1466 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1467
1468 /* Get the def from the vectorized stmt. */
1469 def_stmt_info = vinfo_for_stmt (def_stmt);
1470
1471 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1472 /* Get vectorized pattern statement. */
1473 if (!vec_stmt
1474 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1475 && !STMT_VINFO_RELEVANT (def_stmt_info))
1476 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1477 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1478 gcc_assert (vec_stmt);
1479 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1480 vec_oprnd = PHI_RESULT (vec_stmt);
1481 else if (is_gimple_call (vec_stmt))
1482 vec_oprnd = gimple_call_lhs (vec_stmt);
1483 else
1484 vec_oprnd = gimple_assign_lhs (vec_stmt);
1485 return vec_oprnd;
1486 }
1487
1488 /* Case 4: operand is defined by a loop header phi - reduction */
1489 case vect_reduction_def:
1490 case vect_double_reduction_def:
1491 case vect_nested_cycle:
1492 {
1493 struct loop *loop;
1494
1495 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1496 loop = (gimple_bb (def_stmt))->loop_father;
1497
1498 /* Get the def before the loop */
1499 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1500 return get_initial_def_for_reduction (stmt, op, scalar_def);
1501 }
1502
1503 /* Case 5: operand is defined by loop-header phi - induction. */
1504 case vect_induction_def:
1505 {
1506 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1507
1508 /* Get the def from the vectorized stmt. */
1509 def_stmt_info = vinfo_for_stmt (def_stmt);
1510 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1511 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1512 vec_oprnd = PHI_RESULT (vec_stmt);
1513 else
1514 vec_oprnd = gimple_get_lhs (vec_stmt);
1515 return vec_oprnd;
1516 }
1517
1518 default:
1519 gcc_unreachable ();
1520 }
1521 }
1522
1523
1524 /* Function vect_get_vec_def_for_stmt_copy
1525
1526 Return a vector-def for an operand. This function is used when the
1527 vectorized stmt to be created (by the caller to this function) is a "copy"
1528 created in case the vectorized result cannot fit in one vector, and several
1529 copies of the vector-stmt are required. In this case the vector-def is
1530 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1531 of the stmt that defines VEC_OPRND.
1532 DT is the type of the vector def VEC_OPRND.
1533
1534 Context:
1535 In case the vectorization factor (VF) is bigger than the number
1536 of elements that can fit in a vectype (nunits), we have to generate
1537 more than one vector stmt to vectorize the scalar stmt. This situation
1538 arises when there are multiple data-types operated upon in the loop; the
1539 smallest data-type determines the VF, and as a result, when vectorizing
1540 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1541 vector stmt (each computing a vector of 'nunits' results, and together
1542 computing 'VF' results in each iteration). This function is called when
1543 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1544 which VF=16 and nunits=4, so the number of copies required is 4):
1545
1546 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1547
1548 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1549 VS1.1: vx.1 = memref1 VS1.2
1550 VS1.2: vx.2 = memref2 VS1.3
1551 VS1.3: vx.3 = memref3
1552
1553 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1554 VSnew.1: vz1 = vx.1 + ... VSnew.2
1555 VSnew.2: vz2 = vx.2 + ... VSnew.3
1556 VSnew.3: vz3 = vx.3 + ...
1557
1558 The vectorization of S1 is explained in vectorizable_load.
1559 The vectorization of S2:
1560 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1561 the function 'vect_get_vec_def_for_operand' is called to
1562 get the relevant vector-def for each operand of S2. For operand x it
1563 returns the vector-def 'vx.0'.
1564
1565 To create the remaining copies of the vector-stmt (VSnew.j), this
1566 function is called to get the relevant vector-def for each operand. It is
1567 obtained from the respective VS1.j stmt, which is recorded in the
1568 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1569
1570 For example, to obtain the vector-def 'vx.1' in order to create the
1571 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1572 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1573 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1574 and return its def ('vx.1').
1575 Overall, to create the above sequence this function will be called 3 times:
1576 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1577 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1578 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1579
1580 tree
1581 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1582 {
1583 gimple vec_stmt_for_operand;
1584 stmt_vec_info def_stmt_info;
1585
1586 /* Do nothing; can reuse same def. */
1587 if (dt == vect_external_def || dt == vect_constant_def )
1588 return vec_oprnd;
1589
1590 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1591 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1592 gcc_assert (def_stmt_info);
1593 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (vec_stmt_for_operand);
1595 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1596 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1597 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1598 else
1599 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1600 return vec_oprnd;
1601 }
1602
1603
1604 /* Get vectorized definitions for the operands to create a copy of an original
1605 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1606
1607 static void
1608 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1609 vec<tree> *vec_oprnds0,
1610 vec<tree> *vec_oprnds1)
1611 {
1612 tree vec_oprnd = vec_oprnds0->pop ();
1613
1614 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1615 vec_oprnds0->quick_push (vec_oprnd);
1616
1617 if (vec_oprnds1 && vec_oprnds1->length ())
1618 {
1619 vec_oprnd = vec_oprnds1->pop ();
1620 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1621 vec_oprnds1->quick_push (vec_oprnd);
1622 }
1623 }
1624
1625
1626 /* Get vectorized definitions for OP0 and OP1.
1627 REDUC_INDEX is the index of reduction operand in case of reduction,
1628 and -1 otherwise. */
1629
1630 void
1631 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1632 vec<tree> *vec_oprnds0,
1633 vec<tree> *vec_oprnds1,
1634 slp_tree slp_node, int reduc_index)
1635 {
1636 if (slp_node)
1637 {
1638 int nops = (op1 == NULL_TREE) ? 1 : 2;
1639 auto_vec<tree> ops (nops);
1640 auto_vec<vec<tree> > vec_defs (nops);
1641
1642 ops.quick_push (op0);
1643 if (op1)
1644 ops.quick_push (op1);
1645
1646 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1647
1648 *vec_oprnds0 = vec_defs[0];
1649 if (op1)
1650 *vec_oprnds1 = vec_defs[1];
1651 }
1652 else
1653 {
1654 tree vec_oprnd;
1655
1656 vec_oprnds0->create (1);
1657 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1658 vec_oprnds0->quick_push (vec_oprnd);
1659
1660 if (op1)
1661 {
1662 vec_oprnds1->create (1);
1663 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1664 vec_oprnds1->quick_push (vec_oprnd);
1665 }
1666 }
1667 }
1668
1669
1670 /* Function vect_finish_stmt_generation.
1671
1672 Insert a new stmt. */
1673
1674 void
1675 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1676 gimple_stmt_iterator *gsi)
1677 {
1678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1680 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1681
1682 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1683
1684 if (!gsi_end_p (*gsi)
1685 && gimple_has_mem_ops (vec_stmt))
1686 {
1687 gimple at_stmt = gsi_stmt (*gsi);
1688 tree vuse = gimple_vuse (at_stmt);
1689 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1690 {
1691 tree vdef = gimple_vdef (at_stmt);
1692 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1693 /* If we have an SSA vuse and insert a store, update virtual
1694 SSA form to avoid triggering the renamer. Do so only
1695 if we can easily see all uses - which is what almost always
1696 happens with the way vectorized stmts are inserted. */
1697 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1698 && ((is_gimple_assign (vec_stmt)
1699 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1700 || (is_gimple_call (vec_stmt)
1701 && !(gimple_call_flags (vec_stmt)
1702 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1703 {
1704 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1705 gimple_set_vdef (vec_stmt, new_vdef);
1706 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1707 }
1708 }
1709 }
1710 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1711
1712 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1713 bb_vinfo));
1714
1715 if (dump_enabled_p ())
1716 {
1717 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1718 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1719 }
1720
1721 gimple_set_location (vec_stmt, gimple_location (stmt));
1722
1723 /* While EH edges will generally prevent vectorization, stmt might
1724 e.g. be in a must-not-throw region. Ensure newly created stmts
1725 that could throw are part of the same region. */
1726 int lp_nr = lookup_stmt_eh_lp (stmt);
1727 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1728 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1729 }
1730
1731 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1732 a function declaration if the target has a vectorized version
1733 of the function, or NULL_TREE if the function cannot be vectorized. */
1734
1735 tree
1736 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1737 {
1738 tree fndecl = gimple_call_fndecl (call);
1739
1740 /* We only handle functions that do not read or clobber memory -- i.e.
1741 const or novops ones. */
1742 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1743 return NULL_TREE;
1744
1745 if (!fndecl
1746 || TREE_CODE (fndecl) != FUNCTION_DECL
1747 || !DECL_BUILT_IN (fndecl))
1748 return NULL_TREE;
1749
1750 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1751 vectype_in);
1752 }
1753
1754
1755 static tree permute_vec_elements (tree, tree, tree, gimple,
1756 gimple_stmt_iterator *);
1757
1758
1759 /* Function vectorizable_mask_load_store.
1760
1761 Check if STMT performs a conditional load or store that can be vectorized.
1762 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1763 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1764 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1765
1766 static bool
1767 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1768 gimple *vec_stmt, slp_tree slp_node)
1769 {
1770 tree vec_dest = NULL;
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1772 stmt_vec_info prev_stmt_info;
1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1776 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1778 tree elem_type;
1779 gimple new_stmt;
1780 tree dummy;
1781 tree dataref_ptr = NULL_TREE;
1782 gimple ptr_incr;
1783 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1784 int ncopies;
1785 int i, j;
1786 bool inv_p;
1787 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1788 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1789 int gather_scale = 1;
1790 enum vect_def_type gather_dt = vect_unknown_def_type;
1791 bool is_store;
1792 tree mask;
1793 gimple def_stmt;
1794 tree def;
1795 enum vect_def_type dt;
1796
1797 if (slp_node != NULL)
1798 return false;
1799
1800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1801 gcc_assert (ncopies >= 1);
1802
1803 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1804 mask = gimple_call_arg (stmt, 2);
1805 if (TYPE_PRECISION (TREE_TYPE (mask))
1806 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1807 return false;
1808
1809 /* FORNOW. This restriction should be relaxed. */
1810 if (nested_in_vect_loop && ncopies > 1)
1811 {
1812 if (dump_enabled_p ())
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "multiple types in nested loop.");
1815 return false;
1816 }
1817
1818 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1819 return false;
1820
1821 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1822 return false;
1823
1824 if (!STMT_VINFO_DATA_REF (stmt_info))
1825 return false;
1826
1827 elem_type = TREE_TYPE (vectype);
1828
1829 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1830 return false;
1831
1832 if (STMT_VINFO_STRIDED_P (stmt_info))
1833 return false;
1834
1835 if (STMT_VINFO_GATHER_P (stmt_info))
1836 {
1837 gimple def_stmt;
1838 tree def;
1839 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1840 &gather_off, &gather_scale);
1841 gcc_assert (gather_decl);
1842 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1843 &def_stmt, &def, &gather_dt,
1844 &gather_off_vectype))
1845 {
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848 "gather index use not simple.");
1849 return false;
1850 }
1851
1852 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1853 tree masktype
1854 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1855 if (TREE_CODE (masktype) == INTEGER_TYPE)
1856 {
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "masked gather with integer mask not supported.");
1860 return false;
1861 }
1862 }
1863 else if (tree_int_cst_compare (nested_in_vect_loop
1864 ? STMT_VINFO_DR_STEP (stmt_info)
1865 : DR_STEP (dr), size_zero_node) <= 0)
1866 return false;
1867 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1868 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1869 return false;
1870
1871 if (TREE_CODE (mask) != SSA_NAME)
1872 return false;
1873
1874 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1875 &def_stmt, &def, &dt))
1876 return false;
1877
1878 if (is_store)
1879 {
1880 tree rhs = gimple_call_arg (stmt, 3);
1881 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1882 &def_stmt, &def, &dt))
1883 return false;
1884 }
1885
1886 if (!vec_stmt) /* transformation not required. */
1887 {
1888 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1889 if (is_store)
1890 vect_model_store_cost (stmt_info, ncopies, false, dt,
1891 NULL, NULL, NULL);
1892 else
1893 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1894 return true;
1895 }
1896
1897 /** Transform. **/
1898
1899 if (STMT_VINFO_GATHER_P (stmt_info))
1900 {
1901 tree vec_oprnd0 = NULL_TREE, op;
1902 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1903 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1904 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1905 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1906 tree mask_perm_mask = NULL_TREE;
1907 edge pe = loop_preheader_edge (loop);
1908 gimple_seq seq;
1909 basic_block new_bb;
1910 enum { NARROW, NONE, WIDEN } modifier;
1911 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1912
1913 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1914 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1918 scaletype = TREE_VALUE (arglist);
1919 gcc_checking_assert (types_compatible_p (srctype, rettype)
1920 && types_compatible_p (srctype, masktype));
1921
1922 if (nunits == gather_off_nunits)
1923 modifier = NONE;
1924 else if (nunits == gather_off_nunits / 2)
1925 {
1926 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1927 modifier = WIDEN;
1928
1929 for (i = 0; i < gather_off_nunits; ++i)
1930 sel[i] = i | nunits;
1931
1932 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1933 }
1934 else if (nunits == gather_off_nunits * 2)
1935 {
1936 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1937 modifier = NARROW;
1938
1939 for (i = 0; i < nunits; ++i)
1940 sel[i] = i < gather_off_nunits
1941 ? i : i + nunits - gather_off_nunits;
1942
1943 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1944 ncopies *= 2;
1945 for (i = 0; i < nunits; ++i)
1946 sel[i] = i | gather_off_nunits;
1947 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1948 }
1949 else
1950 gcc_unreachable ();
1951
1952 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1953
1954 ptr = fold_convert (ptrtype, gather_base);
1955 if (!is_gimple_min_invariant (ptr))
1956 {
1957 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1958 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1959 gcc_assert (!new_bb);
1960 }
1961
1962 scale = build_int_cst (scaletype, gather_scale);
1963
1964 prev_stmt_info = NULL;
1965 for (j = 0; j < ncopies; ++j)
1966 {
1967 if (modifier == WIDEN && (j & 1))
1968 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1969 perm_mask, stmt, gsi);
1970 else if (j == 0)
1971 op = vec_oprnd0
1972 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1973 else
1974 op = vec_oprnd0
1975 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1976
1977 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1978 {
1979 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1980 == TYPE_VECTOR_SUBPARTS (idxtype));
1981 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1982 var = make_ssa_name (var);
1983 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1984 new_stmt
1985 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987 op = var;
1988 }
1989
1990 if (mask_perm_mask && (j & 1))
1991 mask_op = permute_vec_elements (mask_op, mask_op,
1992 mask_perm_mask, stmt, gsi);
1993 else
1994 {
1995 if (j == 0)
1996 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1997 else
1998 {
1999 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2000 &def_stmt, &def, &dt);
2001 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2002 }
2003
2004 mask_op = vec_mask;
2005 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2006 {
2007 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2008 == TYPE_VECTOR_SUBPARTS (masktype));
2009 var = vect_get_new_vect_var (masktype, vect_simple_var,
2010 NULL);
2011 var = make_ssa_name (var);
2012 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2013 new_stmt
2014 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2016 mask_op = var;
2017 }
2018 }
2019
2020 new_stmt
2021 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2022 scale);
2023
2024 if (!useless_type_conversion_p (vectype, rettype))
2025 {
2026 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2027 == TYPE_VECTOR_SUBPARTS (rettype));
2028 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2029 op = make_ssa_name (var, new_stmt);
2030 gimple_call_set_lhs (new_stmt, op);
2031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2032 var = make_ssa_name (vec_dest);
2033 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2034 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2035 }
2036 else
2037 {
2038 var = make_ssa_name (vec_dest, new_stmt);
2039 gimple_call_set_lhs (new_stmt, var);
2040 }
2041
2042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2043
2044 if (modifier == NARROW)
2045 {
2046 if ((j & 1) == 0)
2047 {
2048 prev_res = var;
2049 continue;
2050 }
2051 var = permute_vec_elements (prev_res, var,
2052 perm_mask, stmt, gsi);
2053 new_stmt = SSA_NAME_DEF_STMT (var);
2054 }
2055
2056 if (prev_stmt_info == NULL)
2057 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2058 else
2059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2060 prev_stmt_info = vinfo_for_stmt (new_stmt);
2061 }
2062
2063 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2064 from the IL. */
2065 tree lhs = gimple_call_lhs (stmt);
2066 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2067 set_vinfo_for_stmt (new_stmt, stmt_info);
2068 set_vinfo_for_stmt (stmt, NULL);
2069 STMT_VINFO_STMT (stmt_info) = new_stmt;
2070 gsi_replace (gsi, new_stmt, true);
2071 return true;
2072 }
2073 else if (is_store)
2074 {
2075 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2076 prev_stmt_info = NULL;
2077 for (i = 0; i < ncopies; i++)
2078 {
2079 unsigned align, misalign;
2080
2081 if (i == 0)
2082 {
2083 tree rhs = gimple_call_arg (stmt, 3);
2084 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2085 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2086 /* We should have catched mismatched types earlier. */
2087 gcc_assert (useless_type_conversion_p (vectype,
2088 TREE_TYPE (vec_rhs)));
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2093 }
2094 else
2095 {
2096 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2097 &def, &dt);
2098 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2099 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2100 &def, &dt);
2101 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2102 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2103 TYPE_SIZE_UNIT (vectype));
2104 }
2105
2106 align = TYPE_ALIGN_UNIT (vectype);
2107 if (aligned_access_p (dr))
2108 misalign = 0;
2109 else if (DR_MISALIGNMENT (dr) == -1)
2110 {
2111 align = TYPE_ALIGN_UNIT (elem_type);
2112 misalign = 0;
2113 }
2114 else
2115 misalign = DR_MISALIGNMENT (dr);
2116 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2117 misalign);
2118 new_stmt
2119 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2120 gimple_call_arg (stmt, 1),
2121 vec_mask, vec_rhs);
2122 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2123 if (i == 0)
2124 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2125 else
2126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2127 prev_stmt_info = vinfo_for_stmt (new_stmt);
2128 }
2129 }
2130 else
2131 {
2132 tree vec_mask = NULL_TREE;
2133 prev_stmt_info = NULL;
2134 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2135 for (i = 0; i < ncopies; i++)
2136 {
2137 unsigned align, misalign;
2138
2139 if (i == 0)
2140 {
2141 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2142 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2143 NULL_TREE, &dummy, gsi,
2144 &ptr_incr, false, &inv_p);
2145 gcc_assert (!inv_p);
2146 }
2147 else
2148 {
2149 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2150 &def, &dt);
2151 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2153 TYPE_SIZE_UNIT (vectype));
2154 }
2155
2156 align = TYPE_ALIGN_UNIT (vectype);
2157 if (aligned_access_p (dr))
2158 misalign = 0;
2159 else if (DR_MISALIGNMENT (dr) == -1)
2160 {
2161 align = TYPE_ALIGN_UNIT (elem_type);
2162 misalign = 0;
2163 }
2164 else
2165 misalign = DR_MISALIGNMENT (dr);
2166 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2167 misalign);
2168 new_stmt
2169 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2170 gimple_call_arg (stmt, 1),
2171 vec_mask);
2172 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2174 if (i == 0)
2175 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2176 else
2177 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2178 prev_stmt_info = vinfo_for_stmt (new_stmt);
2179 }
2180 }
2181
2182 if (!is_store)
2183 {
2184 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2185 from the IL. */
2186 tree lhs = gimple_call_lhs (stmt);
2187 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2188 set_vinfo_for_stmt (new_stmt, stmt_info);
2189 set_vinfo_for_stmt (stmt, NULL);
2190 STMT_VINFO_STMT (stmt_info) = new_stmt;
2191 gsi_replace (gsi, new_stmt, true);
2192 }
2193
2194 return true;
2195 }
2196
2197
2198 /* Function vectorizable_call.
2199
2200 Check if GS performs a function call that can be vectorized.
2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2204
2205 static bool
2206 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2207 slp_tree slp_node)
2208 {
2209 gcall *stmt;
2210 tree vec_dest;
2211 tree scalar_dest;
2212 tree op, type;
2213 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2214 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2215 tree vectype_out, vectype_in;
2216 int nunits_in;
2217 int nunits_out;
2218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2220 tree fndecl, new_temp, def, rhs_type;
2221 gimple def_stmt;
2222 enum vect_def_type dt[3]
2223 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2224 gimple new_stmt = NULL;
2225 int ncopies, j;
2226 vec<tree> vargs = vNULL;
2227 enum { NARROW, NONE, WIDEN } modifier;
2228 size_t i, nargs;
2229 tree lhs;
2230
2231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2232 return false;
2233
2234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2235 return false;
2236
2237 /* Is GS a vectorizable call? */
2238 stmt = dyn_cast <gcall *> (gs);
2239 if (!stmt)
2240 return false;
2241
2242 if (gimple_call_internal_p (stmt)
2243 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2244 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2245 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2246 slp_node);
2247
2248 if (gimple_call_lhs (stmt) == NULL_TREE
2249 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2250 return false;
2251
2252 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2253
2254 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2255
2256 /* Process function arguments. */
2257 rhs_type = NULL_TREE;
2258 vectype_in = NULL_TREE;
2259 nargs = gimple_call_num_args (stmt);
2260
2261 /* Bail out if the function has more than three arguments, we do not have
2262 interesting builtin functions to vectorize with more than two arguments
2263 except for fma. No arguments is also not good. */
2264 if (nargs == 0 || nargs > 3)
2265 return false;
2266
2267 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2268 if (gimple_call_internal_p (stmt)
2269 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2270 {
2271 nargs = 0;
2272 rhs_type = unsigned_type_node;
2273 }
2274
2275 for (i = 0; i < nargs; i++)
2276 {
2277 tree opvectype;
2278
2279 op = gimple_call_arg (stmt, i);
2280
2281 /* We can only handle calls with arguments of the same type. */
2282 if (rhs_type
2283 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2284 {
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "argument types differ.\n");
2288 return false;
2289 }
2290 if (!rhs_type)
2291 rhs_type = TREE_TYPE (op);
2292
2293 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2294 &def_stmt, &def, &dt[i], &opvectype))
2295 {
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "use not simple.\n");
2299 return false;
2300 }
2301
2302 if (!vectype_in)
2303 vectype_in = opvectype;
2304 else if (opvectype
2305 && opvectype != vectype_in)
2306 {
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "argument vector types differ.\n");
2310 return false;
2311 }
2312 }
2313 /* If all arguments are external or constant defs use a vector type with
2314 the same size as the output vector type. */
2315 if (!vectype_in)
2316 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2317 if (vec_stmt)
2318 gcc_assert (vectype_in);
2319 if (!vectype_in)
2320 {
2321 if (dump_enabled_p ())
2322 {
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "no vectype for scalar type ");
2325 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2326 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2327 }
2328
2329 return false;
2330 }
2331
2332 /* FORNOW */
2333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2334 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2335 if (nunits_in == nunits_out / 2)
2336 modifier = NARROW;
2337 else if (nunits_out == nunits_in)
2338 modifier = NONE;
2339 else if (nunits_out == nunits_in / 2)
2340 modifier = WIDEN;
2341 else
2342 return false;
2343
2344 /* For now, we only vectorize functions if a target specific builtin
2345 is available. TODO -- in some cases, it might be profitable to
2346 insert the calls for pieces of the vector, in order to be able
2347 to vectorize other operations in the loop. */
2348 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2349 if (fndecl == NULL_TREE)
2350 {
2351 if (gimple_call_internal_p (stmt)
2352 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2353 && !slp_node
2354 && loop_vinfo
2355 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2356 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2357 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2358 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2359 {
2360 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2361 { 0, 1, 2, ... vf - 1 } vector. */
2362 gcc_assert (nargs == 0);
2363 }
2364 else
2365 {
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "function is not vectorizable.\n");
2369 return false;
2370 }
2371 }
2372
2373 gcc_assert (!gimple_vuse (stmt));
2374
2375 if (slp_node || PURE_SLP_STMT (stmt_info))
2376 ncopies = 1;
2377 else if (modifier == NARROW)
2378 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2379 else
2380 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2381
2382 /* Sanity check: make sure that at least one copy of the vectorized stmt
2383 needs to be generated. */
2384 gcc_assert (ncopies >= 1);
2385
2386 if (!vec_stmt) /* transformation not required. */
2387 {
2388 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2391 "\n");
2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2393 return true;
2394 }
2395
2396 /** Transform. **/
2397
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2400
2401 /* Handle def. */
2402 scalar_dest = gimple_call_lhs (stmt);
2403 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2404
2405 prev_stmt_info = NULL;
2406 switch (modifier)
2407 {
2408 case NONE:
2409 for (j = 0; j < ncopies; ++j)
2410 {
2411 /* Build argument list for the vectorized call. */
2412 if (j == 0)
2413 vargs.create (nargs);
2414 else
2415 vargs.truncate (0);
2416
2417 if (slp_node)
2418 {
2419 auto_vec<vec<tree> > vec_defs (nargs);
2420 vec<tree> vec_oprnds0;
2421
2422 for (i = 0; i < nargs; i++)
2423 vargs.quick_push (gimple_call_arg (stmt, i));
2424 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2425 vec_oprnds0 = vec_defs[0];
2426
2427 /* Arguments are ready. Create the new vector stmt. */
2428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2429 {
2430 size_t k;
2431 for (k = 0; k < nargs; k++)
2432 {
2433 vec<tree> vec_oprndsk = vec_defs[k];
2434 vargs[k] = vec_oprndsk[i];
2435 }
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2441 }
2442
2443 for (i = 0; i < nargs; i++)
2444 {
2445 vec<tree> vec_oprndsi = vec_defs[i];
2446 vec_oprndsi.release ();
2447 }
2448 continue;
2449 }
2450
2451 for (i = 0; i < nargs; i++)
2452 {
2453 op = gimple_call_arg (stmt, i);
2454 if (j == 0)
2455 vec_oprnd0
2456 = vect_get_vec_def_for_operand (op, stmt, NULL);
2457 else
2458 {
2459 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2460 vec_oprnd0
2461 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2462 }
2463
2464 vargs.quick_push (vec_oprnd0);
2465 }
2466
2467 if (gimple_call_internal_p (stmt)
2468 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2469 {
2470 tree *v = XALLOCAVEC (tree, nunits_out);
2471 int k;
2472 for (k = 0; k < nunits_out; ++k)
2473 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2474 tree cst = build_vector (vectype_out, v);
2475 tree new_var
2476 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2477 gimple init_stmt = gimple_build_assign (new_var, cst);
2478 new_temp = make_ssa_name (new_var, init_stmt);
2479 gimple_assign_set_lhs (init_stmt, new_temp);
2480 vect_init_vector_1 (stmt, init_stmt, NULL);
2481 new_temp = make_ssa_name (vec_dest);
2482 new_stmt = gimple_build_assign (new_temp,
2483 gimple_assign_lhs (init_stmt));
2484 }
2485 else
2486 {
2487 new_stmt = gimple_build_call_vec (fndecl, vargs);
2488 new_temp = make_ssa_name (vec_dest, new_stmt);
2489 gimple_call_set_lhs (new_stmt, new_temp);
2490 }
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2492
2493 if (j == 0)
2494 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2495 else
2496 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2497
2498 prev_stmt_info = vinfo_for_stmt (new_stmt);
2499 }
2500
2501 break;
2502
2503 case NARROW:
2504 for (j = 0; j < ncopies; ++j)
2505 {
2506 /* Build argument list for the vectorized call. */
2507 if (j == 0)
2508 vargs.create (nargs * 2);
2509 else
2510 vargs.truncate (0);
2511
2512 if (slp_node)
2513 {
2514 auto_vec<vec<tree> > vec_defs (nargs);
2515 vec<tree> vec_oprnds0;
2516
2517 for (i = 0; i < nargs; i++)
2518 vargs.quick_push (gimple_call_arg (stmt, i));
2519 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2520 vec_oprnds0 = vec_defs[0];
2521
2522 /* Arguments are ready. Create the new vector stmt. */
2523 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2524 {
2525 size_t k;
2526 vargs.truncate (0);
2527 for (k = 0; k < nargs; k++)
2528 {
2529 vec<tree> vec_oprndsk = vec_defs[k];
2530 vargs.quick_push (vec_oprndsk[i]);
2531 vargs.quick_push (vec_oprndsk[i + 1]);
2532 }
2533 new_stmt = gimple_build_call_vec (fndecl, vargs);
2534 new_temp = make_ssa_name (vec_dest, new_stmt);
2535 gimple_call_set_lhs (new_stmt, new_temp);
2536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2537 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2538 }
2539
2540 for (i = 0; i < nargs; i++)
2541 {
2542 vec<tree> vec_oprndsi = vec_defs[i];
2543 vec_oprndsi.release ();
2544 }
2545 continue;
2546 }
2547
2548 for (i = 0; i < nargs; i++)
2549 {
2550 op = gimple_call_arg (stmt, i);
2551 if (j == 0)
2552 {
2553 vec_oprnd0
2554 = vect_get_vec_def_for_operand (op, stmt, NULL);
2555 vec_oprnd1
2556 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2557 }
2558 else
2559 {
2560 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2561 vec_oprnd0
2562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2563 vec_oprnd1
2564 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2565 }
2566
2567 vargs.quick_push (vec_oprnd0);
2568 vargs.quick_push (vec_oprnd1);
2569 }
2570
2571 new_stmt = gimple_build_call_vec (fndecl, vargs);
2572 new_temp = make_ssa_name (vec_dest, new_stmt);
2573 gimple_call_set_lhs (new_stmt, new_temp);
2574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2575
2576 if (j == 0)
2577 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2578 else
2579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2580
2581 prev_stmt_info = vinfo_for_stmt (new_stmt);
2582 }
2583
2584 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2585
2586 break;
2587
2588 case WIDEN:
2589 /* No current target implements this case. */
2590 return false;
2591 }
2592
2593 vargs.release ();
2594
2595 /* The call in STMT might prevent it from being removed in dce.
2596 We however cannot remove it here, due to the way the ssa name
2597 it defines is mapped to the new definition. So just replace
2598 rhs of the statement with something harmless. */
2599
2600 if (slp_node)
2601 return true;
2602
2603 type = TREE_TYPE (scalar_dest);
2604 if (is_pattern_stmt_p (stmt_info))
2605 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2606 else
2607 lhs = gimple_call_lhs (stmt);
2608 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2609 set_vinfo_for_stmt (new_stmt, stmt_info);
2610 set_vinfo_for_stmt (stmt, NULL);
2611 STMT_VINFO_STMT (stmt_info) = new_stmt;
2612 gsi_replace (gsi, new_stmt, false);
2613
2614 return true;
2615 }
2616
2617
2618 struct simd_call_arg_info
2619 {
2620 tree vectype;
2621 tree op;
2622 enum vect_def_type dt;
2623 HOST_WIDE_INT linear_step;
2624 unsigned int align;
2625 };
2626
2627 /* Function vectorizable_simd_clone_call.
2628
2629 Check if STMT performs a function call that can be vectorized
2630 by calling a simd clone of the function.
2631 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2632 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2633 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2634
2635 static bool
2636 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2637 gimple *vec_stmt, slp_tree slp_node)
2638 {
2639 tree vec_dest;
2640 tree scalar_dest;
2641 tree op, type;
2642 tree vec_oprnd0 = NULL_TREE;
2643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2644 tree vectype;
2645 unsigned int nunits;
2646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2647 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2648 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2649 tree fndecl, new_temp, def;
2650 gimple def_stmt;
2651 gimple new_stmt = NULL;
2652 int ncopies, j;
2653 vec<simd_call_arg_info> arginfo = vNULL;
2654 vec<tree> vargs = vNULL;
2655 size_t i, nargs;
2656 tree lhs, rtype, ratype;
2657 vec<constructor_elt, va_gc> *ret_ctor_elts;
2658
2659 /* Is STMT a vectorizable call? */
2660 if (!is_gimple_call (stmt))
2661 return false;
2662
2663 fndecl = gimple_call_fndecl (stmt);
2664 if (fndecl == NULL_TREE)
2665 return false;
2666
2667 struct cgraph_node *node = cgraph_node::get (fndecl);
2668 if (node == NULL || node->simd_clones == NULL)
2669 return false;
2670
2671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2672 return false;
2673
2674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2675 return false;
2676
2677 if (gimple_call_lhs (stmt)
2678 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2679 return false;
2680
2681 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2682
2683 vectype = STMT_VINFO_VECTYPE (stmt_info);
2684
2685 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2686 return false;
2687
2688 /* FORNOW */
2689 if (slp_node || PURE_SLP_STMT (stmt_info))
2690 return false;
2691
2692 /* Process function arguments. */
2693 nargs = gimple_call_num_args (stmt);
2694
2695 /* Bail out if the function has zero arguments. */
2696 if (nargs == 0)
2697 return false;
2698
2699 arginfo.create (nargs);
2700
2701 for (i = 0; i < nargs; i++)
2702 {
2703 simd_call_arg_info thisarginfo;
2704 affine_iv iv;
2705
2706 thisarginfo.linear_step = 0;
2707 thisarginfo.align = 0;
2708 thisarginfo.op = NULL_TREE;
2709
2710 op = gimple_call_arg (stmt, i);
2711 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2712 &def_stmt, &def, &thisarginfo.dt,
2713 &thisarginfo.vectype)
2714 || thisarginfo.dt == vect_uninitialized_def)
2715 {
2716 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2718 "use not simple.\n");
2719 arginfo.release ();
2720 return false;
2721 }
2722
2723 if (thisarginfo.dt == vect_constant_def
2724 || thisarginfo.dt == vect_external_def)
2725 gcc_assert (thisarginfo.vectype == NULL_TREE);
2726 else
2727 gcc_assert (thisarginfo.vectype != NULL_TREE);
2728
2729 /* For linear arguments, the analyze phase should have saved
2730 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2731 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2732 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2733 {
2734 gcc_assert (vec_stmt);
2735 thisarginfo.linear_step
2736 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2737 thisarginfo.op
2738 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2739 /* If loop has been peeled for alignment, we need to adjust it. */
2740 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2741 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2742 if (n1 != n2)
2743 {
2744 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2745 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2746 tree opt = TREE_TYPE (thisarginfo.op);
2747 bias = fold_convert (TREE_TYPE (step), bias);
2748 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2749 thisarginfo.op
2750 = fold_build2 (POINTER_TYPE_P (opt)
2751 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2752 thisarginfo.op, bias);
2753 }
2754 }
2755 else if (!vec_stmt
2756 && thisarginfo.dt != vect_constant_def
2757 && thisarginfo.dt != vect_external_def
2758 && loop_vinfo
2759 && TREE_CODE (op) == SSA_NAME
2760 && simple_iv (loop, loop_containing_stmt (stmt), op,
2761 &iv, false)
2762 && tree_fits_shwi_p (iv.step))
2763 {
2764 thisarginfo.linear_step = tree_to_shwi (iv.step);
2765 thisarginfo.op = iv.base;
2766 }
2767 else if ((thisarginfo.dt == vect_constant_def
2768 || thisarginfo.dt == vect_external_def)
2769 && POINTER_TYPE_P (TREE_TYPE (op)))
2770 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2771
2772 arginfo.quick_push (thisarginfo);
2773 }
2774
2775 unsigned int badness = 0;
2776 struct cgraph_node *bestn = NULL;
2777 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2778 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2779 else
2780 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2781 n = n->simdclone->next_clone)
2782 {
2783 unsigned int this_badness = 0;
2784 if (n->simdclone->simdlen
2785 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2786 || n->simdclone->nargs != nargs)
2787 continue;
2788 if (n->simdclone->simdlen
2789 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2791 - exact_log2 (n->simdclone->simdlen)) * 1024;
2792 if (n->simdclone->inbranch)
2793 this_badness += 2048;
2794 int target_badness = targetm.simd_clone.usable (n);
2795 if (target_badness < 0)
2796 continue;
2797 this_badness += target_badness * 512;
2798 /* FORNOW: Have to add code to add the mask argument. */
2799 if (n->simdclone->inbranch)
2800 continue;
2801 for (i = 0; i < nargs; i++)
2802 {
2803 switch (n->simdclone->args[i].arg_type)
2804 {
2805 case SIMD_CLONE_ARG_TYPE_VECTOR:
2806 if (!useless_type_conversion_p
2807 (n->simdclone->args[i].orig_type,
2808 TREE_TYPE (gimple_call_arg (stmt, i))))
2809 i = -1;
2810 else if (arginfo[i].dt == vect_constant_def
2811 || arginfo[i].dt == vect_external_def
2812 || arginfo[i].linear_step)
2813 this_badness += 64;
2814 break;
2815 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2816 if (arginfo[i].dt != vect_constant_def
2817 && arginfo[i].dt != vect_external_def)
2818 i = -1;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2821 if (arginfo[i].dt == vect_constant_def
2822 || arginfo[i].dt == vect_external_def
2823 || (arginfo[i].linear_step
2824 != n->simdclone->args[i].linear_step))
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2828 /* FORNOW */
2829 i = -1;
2830 break;
2831 case SIMD_CLONE_ARG_TYPE_MASK:
2832 gcc_unreachable ();
2833 }
2834 if (i == (size_t) -1)
2835 break;
2836 if (n->simdclone->args[i].alignment > arginfo[i].align)
2837 {
2838 i = -1;
2839 break;
2840 }
2841 if (arginfo[i].align)
2842 this_badness += (exact_log2 (arginfo[i].align)
2843 - exact_log2 (n->simdclone->args[i].alignment));
2844 }
2845 if (i == (size_t) -1)
2846 continue;
2847 if (bestn == NULL || this_badness < badness)
2848 {
2849 bestn = n;
2850 badness = this_badness;
2851 }
2852 }
2853
2854 if (bestn == NULL)
2855 {
2856 arginfo.release ();
2857 return false;
2858 }
2859
2860 for (i = 0; i < nargs; i++)
2861 if ((arginfo[i].dt == vect_constant_def
2862 || arginfo[i].dt == vect_external_def)
2863 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2864 {
2865 arginfo[i].vectype
2866 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2867 i)));
2868 if (arginfo[i].vectype == NULL
2869 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2870 > bestn->simdclone->simdlen))
2871 {
2872 arginfo.release ();
2873 return false;
2874 }
2875 }
2876
2877 fndecl = bestn->decl;
2878 nunits = bestn->simdclone->simdlen;
2879 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2880
2881 /* If the function isn't const, only allow it in simd loops where user
2882 has asserted that at least nunits consecutive iterations can be
2883 performed using SIMD instructions. */
2884 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2885 && gimple_vuse (stmt))
2886 {
2887 arginfo.release ();
2888 return false;
2889 }
2890
2891 /* Sanity check: make sure that at least one copy of the vectorized stmt
2892 needs to be generated. */
2893 gcc_assert (ncopies >= 1);
2894
2895 if (!vec_stmt) /* transformation not required. */
2896 {
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2898 for (i = 0; i < nargs; i++)
2899 if (bestn->simdclone->args[i].arg_type
2900 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2901 {
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2903 + 1);
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2905 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2906 ? size_type_node : TREE_TYPE (arginfo[i].op);
2907 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2908 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2909 }
2910 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2911 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "=== vectorizable_simd_clone_call ===\n");
2914 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2915 arginfo.release ();
2916 return true;
2917 }
2918
2919 /** Transform. **/
2920
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2923
2924 /* Handle def. */
2925 scalar_dest = gimple_call_lhs (stmt);
2926 vec_dest = NULL_TREE;
2927 rtype = NULL_TREE;
2928 ratype = NULL_TREE;
2929 if (scalar_dest)
2930 {
2931 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2932 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2933 if (TREE_CODE (rtype) == ARRAY_TYPE)
2934 {
2935 ratype = rtype;
2936 rtype = TREE_TYPE (ratype);
2937 }
2938 }
2939
2940 prev_stmt_info = NULL;
2941 for (j = 0; j < ncopies; ++j)
2942 {
2943 /* Build argument list for the vectorized call. */
2944 if (j == 0)
2945 vargs.create (nargs);
2946 else
2947 vargs.truncate (0);
2948
2949 for (i = 0; i < nargs; i++)
2950 {
2951 unsigned int k, l, m, o;
2952 tree atype;
2953 op = gimple_call_arg (stmt, i);
2954 switch (bestn->simdclone->args[i].arg_type)
2955 {
2956 case SIMD_CLONE_ARG_TYPE_VECTOR:
2957 atype = bestn->simdclone->args[i].vector_type;
2958 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2959 for (m = j * o; m < (j + 1) * o; m++)
2960 {
2961 if (TYPE_VECTOR_SUBPARTS (atype)
2962 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2963 {
2964 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2965 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2966 / TYPE_VECTOR_SUBPARTS (atype));
2967 gcc_assert ((k & (k - 1)) == 0);
2968 if (m == 0)
2969 vec_oprnd0
2970 = vect_get_vec_def_for_operand (op, stmt, NULL);
2971 else
2972 {
2973 vec_oprnd0 = arginfo[i].op;
2974 if ((m & (k - 1)) == 0)
2975 vec_oprnd0
2976 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2977 vec_oprnd0);
2978 }
2979 arginfo[i].op = vec_oprnd0;
2980 vec_oprnd0
2981 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2982 size_int (prec),
2983 bitsize_int ((m & (k - 1)) * prec));
2984 new_stmt
2985 = gimple_build_assign (make_ssa_name (atype),
2986 vec_oprnd0);
2987 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2988 vargs.safe_push (gimple_assign_lhs (new_stmt));
2989 }
2990 else
2991 {
2992 k = (TYPE_VECTOR_SUBPARTS (atype)
2993 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2994 gcc_assert ((k & (k - 1)) == 0);
2995 vec<constructor_elt, va_gc> *ctor_elts;
2996 if (k != 1)
2997 vec_alloc (ctor_elts, k);
2998 else
2999 ctor_elts = NULL;
3000 for (l = 0; l < k; l++)
3001 {
3002 if (m == 0 && l == 0)
3003 vec_oprnd0
3004 = vect_get_vec_def_for_operand (op, stmt, NULL);
3005 else
3006 vec_oprnd0
3007 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3008 arginfo[i].op);
3009 arginfo[i].op = vec_oprnd0;
3010 if (k == 1)
3011 break;
3012 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3013 vec_oprnd0);
3014 }
3015 if (k == 1)
3016 vargs.safe_push (vec_oprnd0);
3017 else
3018 {
3019 vec_oprnd0 = build_constructor (atype, ctor_elts);
3020 new_stmt
3021 = gimple_build_assign (make_ssa_name (atype),
3022 vec_oprnd0);
3023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3024 vargs.safe_push (gimple_assign_lhs (new_stmt));
3025 }
3026 }
3027 }
3028 break;
3029 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3030 vargs.safe_push (op);
3031 break;
3032 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3033 if (j == 0)
3034 {
3035 gimple_seq stmts;
3036 arginfo[i].op
3037 = force_gimple_operand (arginfo[i].op, &stmts, true,
3038 NULL_TREE);
3039 if (stmts != NULL)
3040 {
3041 basic_block new_bb;
3042 edge pe = loop_preheader_edge (loop);
3043 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3044 gcc_assert (!new_bb);
3045 }
3046 tree phi_res = copy_ssa_name (op);
3047 gphi *new_phi = create_phi_node (phi_res, loop->header);
3048 set_vinfo_for_stmt (new_phi,
3049 new_stmt_vec_info (new_phi, loop_vinfo,
3050 NULL));
3051 add_phi_arg (new_phi, arginfo[i].op,
3052 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3053 enum tree_code code
3054 = POINTER_TYPE_P (TREE_TYPE (op))
3055 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3056 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3057 ? sizetype : TREE_TYPE (op);
3058 widest_int cst
3059 = wi::mul (bestn->simdclone->args[i].linear_step,
3060 ncopies * nunits);
3061 tree tcst = wide_int_to_tree (type, cst);
3062 tree phi_arg = copy_ssa_name (op);
3063 new_stmt
3064 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3065 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3066 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3067 set_vinfo_for_stmt (new_stmt,
3068 new_stmt_vec_info (new_stmt, loop_vinfo,
3069 NULL));
3070 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3071 UNKNOWN_LOCATION);
3072 arginfo[i].op = phi_res;
3073 vargs.safe_push (phi_res);
3074 }
3075 else
3076 {
3077 enum tree_code code
3078 = POINTER_TYPE_P (TREE_TYPE (op))
3079 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3080 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3081 ? sizetype : TREE_TYPE (op);
3082 widest_int cst
3083 = wi::mul (bestn->simdclone->args[i].linear_step,
3084 j * nunits);
3085 tree tcst = wide_int_to_tree (type, cst);
3086 new_temp = make_ssa_name (TREE_TYPE (op));
3087 new_stmt = gimple_build_assign (new_temp, code,
3088 arginfo[i].op, tcst);
3089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090 vargs.safe_push (new_temp);
3091 }
3092 break;
3093 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3094 default:
3095 gcc_unreachable ();
3096 }
3097 }
3098
3099 new_stmt = gimple_build_call_vec (fndecl, vargs);
3100 if (vec_dest)
3101 {
3102 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3103 if (ratype)
3104 new_temp = create_tmp_var (ratype);
3105 else if (TYPE_VECTOR_SUBPARTS (vectype)
3106 == TYPE_VECTOR_SUBPARTS (rtype))
3107 new_temp = make_ssa_name (vec_dest, new_stmt);
3108 else
3109 new_temp = make_ssa_name (rtype, new_stmt);
3110 gimple_call_set_lhs (new_stmt, new_temp);
3111 }
3112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3113
3114 if (vec_dest)
3115 {
3116 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3117 {
3118 unsigned int k, l;
3119 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3120 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3121 gcc_assert ((k & (k - 1)) == 0);
3122 for (l = 0; l < k; l++)
3123 {
3124 tree t;
3125 if (ratype)
3126 {
3127 t = build_fold_addr_expr (new_temp);
3128 t = build2 (MEM_REF, vectype, t,
3129 build_int_cst (TREE_TYPE (t),
3130 l * prec / BITS_PER_UNIT));
3131 }
3132 else
3133 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3134 size_int (prec), bitsize_int (l * prec));
3135 new_stmt
3136 = gimple_build_assign (make_ssa_name (vectype), t);
3137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3138 if (j == 0 && l == 0)
3139 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3140 else
3141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3142
3143 prev_stmt_info = vinfo_for_stmt (new_stmt);
3144 }
3145
3146 if (ratype)
3147 {
3148 tree clobber = build_constructor (ratype, NULL);
3149 TREE_THIS_VOLATILE (clobber) = 1;
3150 new_stmt = gimple_build_assign (new_temp, clobber);
3151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3152 }
3153 continue;
3154 }
3155 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3156 {
3157 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3158 / TYPE_VECTOR_SUBPARTS (rtype));
3159 gcc_assert ((k & (k - 1)) == 0);
3160 if ((j & (k - 1)) == 0)
3161 vec_alloc (ret_ctor_elts, k);
3162 if (ratype)
3163 {
3164 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3165 for (m = 0; m < o; m++)
3166 {
3167 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3168 size_int (m), NULL_TREE, NULL_TREE);
3169 new_stmt
3170 = gimple_build_assign (make_ssa_name (rtype), tem);
3171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3172 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3173 gimple_assign_lhs (new_stmt));
3174 }
3175 tree clobber = build_constructor (ratype, NULL);
3176 TREE_THIS_VOLATILE (clobber) = 1;
3177 new_stmt = gimple_build_assign (new_temp, clobber);
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3179 }
3180 else
3181 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3182 if ((j & (k - 1)) != k - 1)
3183 continue;
3184 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3185 new_stmt
3186 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3188
3189 if ((unsigned) j == k - 1)
3190 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3191 else
3192 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3193
3194 prev_stmt_info = vinfo_for_stmt (new_stmt);
3195 continue;
3196 }
3197 else if (ratype)
3198 {
3199 tree t = build_fold_addr_expr (new_temp);
3200 t = build2 (MEM_REF, vectype, t,
3201 build_int_cst (TREE_TYPE (t), 0));
3202 new_stmt
3203 = gimple_build_assign (make_ssa_name (vec_dest), t);
3204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3205 tree clobber = build_constructor (ratype, NULL);
3206 TREE_THIS_VOLATILE (clobber) = 1;
3207 vect_finish_stmt_generation (stmt,
3208 gimple_build_assign (new_temp,
3209 clobber), gsi);
3210 }
3211 }
3212
3213 if (j == 0)
3214 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3215 else
3216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3217
3218 prev_stmt_info = vinfo_for_stmt (new_stmt);
3219 }
3220
3221 vargs.release ();
3222
3223 /* The call in STMT might prevent it from being removed in dce.
3224 We however cannot remove it here, due to the way the ssa name
3225 it defines is mapped to the new definition. So just replace
3226 rhs of the statement with something harmless. */
3227
3228 if (slp_node)
3229 return true;
3230
3231 if (scalar_dest)
3232 {
3233 type = TREE_TYPE (scalar_dest);
3234 if (is_pattern_stmt_p (stmt_info))
3235 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3236 else
3237 lhs = gimple_call_lhs (stmt);
3238 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3239 }
3240 else
3241 new_stmt = gimple_build_nop ();
3242 set_vinfo_for_stmt (new_stmt, stmt_info);
3243 set_vinfo_for_stmt (stmt, NULL);
3244 STMT_VINFO_STMT (stmt_info) = new_stmt;
3245 gsi_replace (gsi, new_stmt, true);
3246 unlink_stmt_vdef (stmt);
3247
3248 return true;
3249 }
3250
3251
3252 /* Function vect_gen_widened_results_half
3253
3254 Create a vector stmt whose code, type, number of arguments, and result
3255 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3256 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3257 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3258 needs to be created (DECL is a function-decl of a target-builtin).
3259 STMT is the original scalar stmt that we are vectorizing. */
3260
3261 static gimple
3262 vect_gen_widened_results_half (enum tree_code code,
3263 tree decl,
3264 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3265 tree vec_dest, gimple_stmt_iterator *gsi,
3266 gimple stmt)
3267 {
3268 gimple new_stmt;
3269 tree new_temp;
3270
3271 /* Generate half of the widened result: */
3272 if (code == CALL_EXPR)
3273 {
3274 /* Target specific support */
3275 if (op_type == binary_op)
3276 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3277 else
3278 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3279 new_temp = make_ssa_name (vec_dest, new_stmt);
3280 gimple_call_set_lhs (new_stmt, new_temp);
3281 }
3282 else
3283 {
3284 /* Generic support */
3285 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3286 if (op_type != binary_op)
3287 vec_oprnd1 = NULL;
3288 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3289 new_temp = make_ssa_name (vec_dest, new_stmt);
3290 gimple_assign_set_lhs (new_stmt, new_temp);
3291 }
3292 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3293
3294 return new_stmt;
3295 }
3296
3297
3298 /* Get vectorized definitions for loop-based vectorization. For the first
3299 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3300 scalar operand), and for the rest we get a copy with
3301 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3302 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3303 The vectors are collected into VEC_OPRNDS. */
3304
3305 static void
3306 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3307 vec<tree> *vec_oprnds, int multi_step_cvt)
3308 {
3309 tree vec_oprnd;
3310
3311 /* Get first vector operand. */
3312 /* All the vector operands except the very first one (that is scalar oprnd)
3313 are stmt copies. */
3314 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3315 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3316 else
3317 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3318
3319 vec_oprnds->quick_push (vec_oprnd);
3320
3321 /* Get second vector operand. */
3322 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3323 vec_oprnds->quick_push (vec_oprnd);
3324
3325 *oprnd = vec_oprnd;
3326
3327 /* For conversion in multiple steps, continue to get operands
3328 recursively. */
3329 if (multi_step_cvt)
3330 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3331 }
3332
3333
3334 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3335 For multi-step conversions store the resulting vectors and call the function
3336 recursively. */
3337
3338 static void
3339 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3340 int multi_step_cvt, gimple stmt,
3341 vec<tree> vec_dsts,
3342 gimple_stmt_iterator *gsi,
3343 slp_tree slp_node, enum tree_code code,
3344 stmt_vec_info *prev_stmt_info)
3345 {
3346 unsigned int i;
3347 tree vop0, vop1, new_tmp, vec_dest;
3348 gimple new_stmt;
3349 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3350
3351 vec_dest = vec_dsts.pop ();
3352
3353 for (i = 0; i < vec_oprnds->length (); i += 2)
3354 {
3355 /* Create demotion operation. */
3356 vop0 = (*vec_oprnds)[i];
3357 vop1 = (*vec_oprnds)[i + 1];
3358 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3359 new_tmp = make_ssa_name (vec_dest, new_stmt);
3360 gimple_assign_set_lhs (new_stmt, new_tmp);
3361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3362
3363 if (multi_step_cvt)
3364 /* Store the resulting vector for next recursive call. */
3365 (*vec_oprnds)[i/2] = new_tmp;
3366 else
3367 {
3368 /* This is the last step of the conversion sequence. Store the
3369 vectors in SLP_NODE or in vector info of the scalar statement
3370 (or in STMT_VINFO_RELATED_STMT chain). */
3371 if (slp_node)
3372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3373 else
3374 {
3375 if (!*prev_stmt_info)
3376 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3377 else
3378 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3379
3380 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3381 }
3382 }
3383 }
3384
3385 /* For multi-step demotion operations we first generate demotion operations
3386 from the source type to the intermediate types, and then combine the
3387 results (stored in VEC_OPRNDS) in demotion operation to the destination
3388 type. */
3389 if (multi_step_cvt)
3390 {
3391 /* At each level of recursion we have half of the operands we had at the
3392 previous level. */
3393 vec_oprnds->truncate ((i+1)/2);
3394 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3395 stmt, vec_dsts, gsi, slp_node,
3396 VEC_PACK_TRUNC_EXPR,
3397 prev_stmt_info);
3398 }
3399
3400 vec_dsts.quick_push (vec_dest);
3401 }
3402
3403
3404 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3405 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3406 the resulting vectors and call the function recursively. */
3407
3408 static void
3409 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3410 vec<tree> *vec_oprnds1,
3411 gimple stmt, tree vec_dest,
3412 gimple_stmt_iterator *gsi,
3413 enum tree_code code1,
3414 enum tree_code code2, tree decl1,
3415 tree decl2, int op_type)
3416 {
3417 int i;
3418 tree vop0, vop1, new_tmp1, new_tmp2;
3419 gimple new_stmt1, new_stmt2;
3420 vec<tree> vec_tmp = vNULL;
3421
3422 vec_tmp.create (vec_oprnds0->length () * 2);
3423 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3424 {
3425 if (op_type == binary_op)
3426 vop1 = (*vec_oprnds1)[i];
3427 else
3428 vop1 = NULL_TREE;
3429
3430 /* Generate the two halves of promotion operation. */
3431 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3432 op_type, vec_dest, gsi, stmt);
3433 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3434 op_type, vec_dest, gsi, stmt);
3435 if (is_gimple_call (new_stmt1))
3436 {
3437 new_tmp1 = gimple_call_lhs (new_stmt1);
3438 new_tmp2 = gimple_call_lhs (new_stmt2);
3439 }
3440 else
3441 {
3442 new_tmp1 = gimple_assign_lhs (new_stmt1);
3443 new_tmp2 = gimple_assign_lhs (new_stmt2);
3444 }
3445
3446 /* Store the results for the next step. */
3447 vec_tmp.quick_push (new_tmp1);
3448 vec_tmp.quick_push (new_tmp2);
3449 }
3450
3451 vec_oprnds0->release ();
3452 *vec_oprnds0 = vec_tmp;
3453 }
3454
3455
3456 /* Check if STMT performs a conversion operation, that can be vectorized.
3457 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3458 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3459 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3460
3461 static bool
3462 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3463 gimple *vec_stmt, slp_tree slp_node)
3464 {
3465 tree vec_dest;
3466 tree scalar_dest;
3467 tree op0, op1 = NULL_TREE;
3468 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3470 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3471 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3472 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3473 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3474 tree new_temp;
3475 tree def;
3476 gimple def_stmt;
3477 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3478 gimple new_stmt = NULL;
3479 stmt_vec_info prev_stmt_info;
3480 int nunits_in;
3481 int nunits_out;
3482 tree vectype_out, vectype_in;
3483 int ncopies, i, j;
3484 tree lhs_type, rhs_type;
3485 enum { NARROW, NONE, WIDEN } modifier;
3486 vec<tree> vec_oprnds0 = vNULL;
3487 vec<tree> vec_oprnds1 = vNULL;
3488 tree vop0;
3489 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3490 int multi_step_cvt = 0;
3491 vec<tree> vec_dsts = vNULL;
3492 vec<tree> interm_types = vNULL;
3493 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3494 int op_type;
3495 machine_mode rhs_mode;
3496 unsigned short fltsz;
3497
3498 /* Is STMT a vectorizable conversion? */
3499
3500 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3501 return false;
3502
3503 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3504 return false;
3505
3506 if (!is_gimple_assign (stmt))
3507 return false;
3508
3509 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3510 return false;
3511
3512 code = gimple_assign_rhs_code (stmt);
3513 if (!CONVERT_EXPR_CODE_P (code)
3514 && code != FIX_TRUNC_EXPR
3515 && code != FLOAT_EXPR
3516 && code != WIDEN_MULT_EXPR
3517 && code != WIDEN_LSHIFT_EXPR)
3518 return false;
3519
3520 op_type = TREE_CODE_LENGTH (code);
3521
3522 /* Check types of lhs and rhs. */
3523 scalar_dest = gimple_assign_lhs (stmt);
3524 lhs_type = TREE_TYPE (scalar_dest);
3525 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3526
3527 op0 = gimple_assign_rhs1 (stmt);
3528 rhs_type = TREE_TYPE (op0);
3529
3530 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3531 && !((INTEGRAL_TYPE_P (lhs_type)
3532 && INTEGRAL_TYPE_P (rhs_type))
3533 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3534 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3535 return false;
3536
3537 if ((INTEGRAL_TYPE_P (lhs_type)
3538 && (TYPE_PRECISION (lhs_type)
3539 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3540 || (INTEGRAL_TYPE_P (rhs_type)
3541 && (TYPE_PRECISION (rhs_type)
3542 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3543 {
3544 if (dump_enabled_p ())
3545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3546 "type conversion to/from bit-precision unsupported."
3547 "\n");
3548 return false;
3549 }
3550
3551 /* Check the operands of the operation. */
3552 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3553 &def_stmt, &def, &dt[0], &vectype_in))
3554 {
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3557 "use not simple.\n");
3558 return false;
3559 }
3560 if (op_type == binary_op)
3561 {
3562 bool ok;
3563
3564 op1 = gimple_assign_rhs2 (stmt);
3565 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3566 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3567 OP1. */
3568 if (CONSTANT_CLASS_P (op0))
3569 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3570 &def_stmt, &def, &dt[1], &vectype_in);
3571 else
3572 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3573 &def, &dt[1]);
3574
3575 if (!ok)
3576 {
3577 if (dump_enabled_p ())
3578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3579 "use not simple.\n");
3580 return false;
3581 }
3582 }
3583
3584 /* If op0 is an external or constant defs use a vector type of
3585 the same size as the output vector type. */
3586 if (!vectype_in)
3587 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3588 if (vec_stmt)
3589 gcc_assert (vectype_in);
3590 if (!vectype_in)
3591 {
3592 if (dump_enabled_p ())
3593 {
3594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3595 "no vectype for scalar type ");
3596 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3597 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3598 }
3599
3600 return false;
3601 }
3602
3603 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3604 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3605 if (nunits_in < nunits_out)
3606 modifier = NARROW;
3607 else if (nunits_out == nunits_in)
3608 modifier = NONE;
3609 else
3610 modifier = WIDEN;
3611
3612 /* Multiple types in SLP are handled by creating the appropriate number of
3613 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3614 case of SLP. */
3615 if (slp_node || PURE_SLP_STMT (stmt_info))
3616 ncopies = 1;
3617 else if (modifier == NARROW)
3618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3619 else
3620 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3621
3622 /* Sanity check: make sure that at least one copy of the vectorized stmt
3623 needs to be generated. */
3624 gcc_assert (ncopies >= 1);
3625
3626 /* Supportable by target? */
3627 switch (modifier)
3628 {
3629 case NONE:
3630 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3631 return false;
3632 if (supportable_convert_operation (code, vectype_out, vectype_in,
3633 &decl1, &code1))
3634 break;
3635 /* FALLTHRU */
3636 unsupported:
3637 if (dump_enabled_p ())
3638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3639 "conversion not supported by target.\n");
3640 return false;
3641
3642 case WIDEN:
3643 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3644 &code1, &code2, &multi_step_cvt,
3645 &interm_types))
3646 {
3647 /* Binary widening operation can only be supported directly by the
3648 architecture. */
3649 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3650 break;
3651 }
3652
3653 if (code != FLOAT_EXPR
3654 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3655 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3656 goto unsupported;
3657
3658 rhs_mode = TYPE_MODE (rhs_type);
3659 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3660 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3661 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3662 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3663 {
3664 cvt_type
3665 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3666 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3667 if (cvt_type == NULL_TREE)
3668 goto unsupported;
3669
3670 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3671 {
3672 if (!supportable_convert_operation (code, vectype_out,
3673 cvt_type, &decl1, &codecvt1))
3674 goto unsupported;
3675 }
3676 else if (!supportable_widening_operation (code, stmt, vectype_out,
3677 cvt_type, &codecvt1,
3678 &codecvt2, &multi_step_cvt,
3679 &interm_types))
3680 continue;
3681 else
3682 gcc_assert (multi_step_cvt == 0);
3683
3684 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3685 vectype_in, &code1, &code2,
3686 &multi_step_cvt, &interm_types))
3687 break;
3688 }
3689
3690 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3691 goto unsupported;
3692
3693 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3694 codecvt2 = ERROR_MARK;
3695 else
3696 {
3697 multi_step_cvt++;
3698 interm_types.safe_push (cvt_type);
3699 cvt_type = NULL_TREE;
3700 }
3701 break;
3702
3703 case NARROW:
3704 gcc_assert (op_type == unary_op);
3705 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3706 &code1, &multi_step_cvt,
3707 &interm_types))
3708 break;
3709
3710 if (code != FIX_TRUNC_EXPR
3711 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3712 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3713 goto unsupported;
3714
3715 rhs_mode = TYPE_MODE (rhs_type);
3716 cvt_type
3717 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3718 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3719 if (cvt_type == NULL_TREE)
3720 goto unsupported;
3721 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3722 &decl1, &codecvt1))
3723 goto unsupported;
3724 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3725 &code1, &multi_step_cvt,
3726 &interm_types))
3727 break;
3728 goto unsupported;
3729
3730 default:
3731 gcc_unreachable ();
3732 }
3733
3734 if (!vec_stmt) /* transformation not required. */
3735 {
3736 if (dump_enabled_p ())
3737 dump_printf_loc (MSG_NOTE, vect_location,
3738 "=== vectorizable_conversion ===\n");
3739 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3740 {
3741 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3742 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3743 }
3744 else if (modifier == NARROW)
3745 {
3746 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3747 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3748 }
3749 else
3750 {
3751 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3752 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3753 }
3754 interm_types.release ();
3755 return true;
3756 }
3757
3758 /** Transform. **/
3759 if (dump_enabled_p ())
3760 dump_printf_loc (MSG_NOTE, vect_location,
3761 "transform conversion. ncopies = %d.\n", ncopies);
3762
3763 if (op_type == binary_op)
3764 {
3765 if (CONSTANT_CLASS_P (op0))
3766 op0 = fold_convert (TREE_TYPE (op1), op0);
3767 else if (CONSTANT_CLASS_P (op1))
3768 op1 = fold_convert (TREE_TYPE (op0), op1);
3769 }
3770
3771 /* In case of multi-step conversion, we first generate conversion operations
3772 to the intermediate types, and then from that types to the final one.
3773 We create vector destinations for the intermediate type (TYPES) received
3774 from supportable_*_operation, and store them in the correct order
3775 for future use in vect_create_vectorized_*_stmts (). */
3776 vec_dsts.create (multi_step_cvt + 1);
3777 vec_dest = vect_create_destination_var (scalar_dest,
3778 (cvt_type && modifier == WIDEN)
3779 ? cvt_type : vectype_out);
3780 vec_dsts.quick_push (vec_dest);
3781
3782 if (multi_step_cvt)
3783 {
3784 for (i = interm_types.length () - 1;
3785 interm_types.iterate (i, &intermediate_type); i--)
3786 {
3787 vec_dest = vect_create_destination_var (scalar_dest,
3788 intermediate_type);
3789 vec_dsts.quick_push (vec_dest);
3790 }
3791 }
3792
3793 if (cvt_type)
3794 vec_dest = vect_create_destination_var (scalar_dest,
3795 modifier == WIDEN
3796 ? vectype_out : cvt_type);
3797
3798 if (!slp_node)
3799 {
3800 if (modifier == WIDEN)
3801 {
3802 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3803 if (op_type == binary_op)
3804 vec_oprnds1.create (1);
3805 }
3806 else if (modifier == NARROW)
3807 vec_oprnds0.create (
3808 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3809 }
3810 else if (code == WIDEN_LSHIFT_EXPR)
3811 vec_oprnds1.create (slp_node->vec_stmts_size);
3812
3813 last_oprnd = op0;
3814 prev_stmt_info = NULL;
3815 switch (modifier)
3816 {
3817 case NONE:
3818 for (j = 0; j < ncopies; j++)
3819 {
3820 if (j == 0)
3821 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3822 -1);
3823 else
3824 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3825
3826 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3827 {
3828 /* Arguments are ready, create the new vector stmt. */
3829 if (code1 == CALL_EXPR)
3830 {
3831 new_stmt = gimple_build_call (decl1, 1, vop0);
3832 new_temp = make_ssa_name (vec_dest, new_stmt);
3833 gimple_call_set_lhs (new_stmt, new_temp);
3834 }
3835 else
3836 {
3837 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3838 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3839 new_temp = make_ssa_name (vec_dest, new_stmt);
3840 gimple_assign_set_lhs (new_stmt, new_temp);
3841 }
3842
3843 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3844 if (slp_node)
3845 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3846 }
3847
3848 if (j == 0)
3849 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3850 else
3851 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3852 prev_stmt_info = vinfo_for_stmt (new_stmt);
3853 }
3854 break;
3855
3856 case WIDEN:
3857 /* In case the vectorization factor (VF) is bigger than the number
3858 of elements that we can fit in a vectype (nunits), we have to
3859 generate more than one vector stmt - i.e - we need to "unroll"
3860 the vector stmt by a factor VF/nunits. */
3861 for (j = 0; j < ncopies; j++)
3862 {
3863 /* Handle uses. */
3864 if (j == 0)
3865 {
3866 if (slp_node)
3867 {
3868 if (code == WIDEN_LSHIFT_EXPR)
3869 {
3870 unsigned int k;
3871
3872 vec_oprnd1 = op1;
3873 /* Store vec_oprnd1 for every vector stmt to be created
3874 for SLP_NODE. We check during the analysis that all
3875 the shift arguments are the same. */
3876 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3877 vec_oprnds1.quick_push (vec_oprnd1);
3878
3879 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3880 slp_node, -1);
3881 }
3882 else
3883 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3884 &vec_oprnds1, slp_node, -1);
3885 }
3886 else
3887 {
3888 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3889 vec_oprnds0.quick_push (vec_oprnd0);
3890 if (op_type == binary_op)
3891 {
3892 if (code == WIDEN_LSHIFT_EXPR)
3893 vec_oprnd1 = op1;
3894 else
3895 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3896 NULL);
3897 vec_oprnds1.quick_push (vec_oprnd1);
3898 }
3899 }
3900 }
3901 else
3902 {
3903 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3904 vec_oprnds0.truncate (0);
3905 vec_oprnds0.quick_push (vec_oprnd0);
3906 if (op_type == binary_op)
3907 {
3908 if (code == WIDEN_LSHIFT_EXPR)
3909 vec_oprnd1 = op1;
3910 else
3911 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3912 vec_oprnd1);
3913 vec_oprnds1.truncate (0);
3914 vec_oprnds1.quick_push (vec_oprnd1);
3915 }
3916 }
3917
3918 /* Arguments are ready. Create the new vector stmts. */
3919 for (i = multi_step_cvt; i >= 0; i--)
3920 {
3921 tree this_dest = vec_dsts[i];
3922 enum tree_code c1 = code1, c2 = code2;
3923 if (i == 0 && codecvt2 != ERROR_MARK)
3924 {
3925 c1 = codecvt1;
3926 c2 = codecvt2;
3927 }
3928 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3929 &vec_oprnds1,
3930 stmt, this_dest, gsi,
3931 c1, c2, decl1, decl2,
3932 op_type);
3933 }
3934
3935 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3936 {
3937 if (cvt_type)
3938 {
3939 if (codecvt1 == CALL_EXPR)
3940 {
3941 new_stmt = gimple_build_call (decl1, 1, vop0);
3942 new_temp = make_ssa_name (vec_dest, new_stmt);
3943 gimple_call_set_lhs (new_stmt, new_temp);
3944 }
3945 else
3946 {
3947 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3948 new_temp = make_ssa_name (vec_dest);
3949 new_stmt = gimple_build_assign (new_temp, codecvt1,
3950 vop0);
3951 }
3952
3953 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3954 }
3955 else
3956 new_stmt = SSA_NAME_DEF_STMT (vop0);
3957
3958 if (slp_node)
3959 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3960 else
3961 {
3962 if (!prev_stmt_info)
3963 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3964 else
3965 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3966 prev_stmt_info = vinfo_for_stmt (new_stmt);
3967 }
3968 }
3969 }
3970
3971 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3972 break;
3973
3974 case NARROW:
3975 /* In case the vectorization factor (VF) is bigger than the number
3976 of elements that we can fit in a vectype (nunits), we have to
3977 generate more than one vector stmt - i.e - we need to "unroll"
3978 the vector stmt by a factor VF/nunits. */
3979 for (j = 0; j < ncopies; j++)
3980 {
3981 /* Handle uses. */
3982 if (slp_node)
3983 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3984 slp_node, -1);
3985 else
3986 {
3987 vec_oprnds0.truncate (0);
3988 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3989 vect_pow2 (multi_step_cvt) - 1);
3990 }
3991
3992 /* Arguments are ready. Create the new vector stmts. */
3993 if (cvt_type)
3994 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3995 {
3996 if (codecvt1 == CALL_EXPR)
3997 {
3998 new_stmt = gimple_build_call (decl1, 1, vop0);
3999 new_temp = make_ssa_name (vec_dest, new_stmt);
4000 gimple_call_set_lhs (new_stmt, new_temp);
4001 }
4002 else
4003 {
4004 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4005 new_temp = make_ssa_name (vec_dest);
4006 new_stmt = gimple_build_assign (new_temp, codecvt1,
4007 vop0);
4008 }
4009
4010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4011 vec_oprnds0[i] = new_temp;
4012 }
4013
4014 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4015 stmt, vec_dsts, gsi,
4016 slp_node, code1,
4017 &prev_stmt_info);
4018 }
4019
4020 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4021 break;
4022 }
4023
4024 vec_oprnds0.release ();
4025 vec_oprnds1.release ();
4026 vec_dsts.release ();
4027 interm_types.release ();
4028
4029 return true;
4030 }
4031
4032
4033 /* Function vectorizable_assignment.
4034
4035 Check if STMT performs an assignment (copy) that can be vectorized.
4036 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4037 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4038 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4039
4040 static bool
4041 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4042 gimple *vec_stmt, slp_tree slp_node)
4043 {
4044 tree vec_dest;
4045 tree scalar_dest;
4046 tree op;
4047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4048 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4049 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4050 tree new_temp;
4051 tree def;
4052 gimple def_stmt;
4053 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4054 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4055 int ncopies;
4056 int i, j;
4057 vec<tree> vec_oprnds = vNULL;
4058 tree vop;
4059 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4060 gimple new_stmt = NULL;
4061 stmt_vec_info prev_stmt_info = NULL;
4062 enum tree_code code;
4063 tree vectype_in;
4064
4065 /* Multiple types in SLP are handled by creating the appropriate number of
4066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4067 case of SLP. */
4068 if (slp_node || PURE_SLP_STMT (stmt_info))
4069 ncopies = 1;
4070 else
4071 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4072
4073 gcc_assert (ncopies >= 1);
4074
4075 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4076 return false;
4077
4078 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4079 return false;
4080
4081 /* Is vectorizable assignment? */
4082 if (!is_gimple_assign (stmt))
4083 return false;
4084
4085 scalar_dest = gimple_assign_lhs (stmt);
4086 if (TREE_CODE (scalar_dest) != SSA_NAME)
4087 return false;
4088
4089 code = gimple_assign_rhs_code (stmt);
4090 if (gimple_assign_single_p (stmt)
4091 || code == PAREN_EXPR
4092 || CONVERT_EXPR_CODE_P (code))
4093 op = gimple_assign_rhs1 (stmt);
4094 else
4095 return false;
4096
4097 if (code == VIEW_CONVERT_EXPR)
4098 op = TREE_OPERAND (op, 0);
4099
4100 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4101 &def_stmt, &def, &dt[0], &vectype_in))
4102 {
4103 if (dump_enabled_p ())
4104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4105 "use not simple.\n");
4106 return false;
4107 }
4108
4109 /* We can handle NOP_EXPR conversions that do not change the number
4110 of elements or the vector size. */
4111 if ((CONVERT_EXPR_CODE_P (code)
4112 || code == VIEW_CONVERT_EXPR)
4113 && (!vectype_in
4114 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4115 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4116 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4117 return false;
4118
4119 /* We do not handle bit-precision changes. */
4120 if ((CONVERT_EXPR_CODE_P (code)
4121 || code == VIEW_CONVERT_EXPR)
4122 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4123 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4125 || ((TYPE_PRECISION (TREE_TYPE (op))
4126 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4127 /* But a conversion that does not change the bit-pattern is ok. */
4128 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4129 > TYPE_PRECISION (TREE_TYPE (op)))
4130 && TYPE_UNSIGNED (TREE_TYPE (op))))
4131 {
4132 if (dump_enabled_p ())
4133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4134 "type conversion to/from bit-precision "
4135 "unsupported.\n");
4136 return false;
4137 }
4138
4139 if (!vec_stmt) /* transformation not required. */
4140 {
4141 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_NOTE, vect_location,
4144 "=== vectorizable_assignment ===\n");
4145 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4146 return true;
4147 }
4148
4149 /** Transform. **/
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4152
4153 /* Handle def. */
4154 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4155
4156 /* Handle use. */
4157 for (j = 0; j < ncopies; j++)
4158 {
4159 /* Handle uses. */
4160 if (j == 0)
4161 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4162 else
4163 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4164
4165 /* Arguments are ready. create the new vector stmt. */
4166 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4167 {
4168 if (CONVERT_EXPR_CODE_P (code)
4169 || code == VIEW_CONVERT_EXPR)
4170 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4171 new_stmt = gimple_build_assign (vec_dest, vop);
4172 new_temp = make_ssa_name (vec_dest, new_stmt);
4173 gimple_assign_set_lhs (new_stmt, new_temp);
4174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4175 if (slp_node)
4176 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4177 }
4178
4179 if (slp_node)
4180 continue;
4181
4182 if (j == 0)
4183 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4184 else
4185 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4186
4187 prev_stmt_info = vinfo_for_stmt (new_stmt);
4188 }
4189
4190 vec_oprnds.release ();
4191 return true;
4192 }
4193
4194
4195 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4196 either as shift by a scalar or by a vector. */
4197
4198 bool
4199 vect_supportable_shift (enum tree_code code, tree scalar_type)
4200 {
4201
4202 machine_mode vec_mode;
4203 optab optab;
4204 int icode;
4205 tree vectype;
4206
4207 vectype = get_vectype_for_scalar_type (scalar_type);
4208 if (!vectype)
4209 return false;
4210
4211 optab = optab_for_tree_code (code, vectype, optab_scalar);
4212 if (!optab
4213 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4214 {
4215 optab = optab_for_tree_code (code, vectype, optab_vector);
4216 if (!optab
4217 || (optab_handler (optab, TYPE_MODE (vectype))
4218 == CODE_FOR_nothing))
4219 return false;
4220 }
4221
4222 vec_mode = TYPE_MODE (vectype);
4223 icode = (int) optab_handler (optab, vec_mode);
4224 if (icode == CODE_FOR_nothing)
4225 return false;
4226
4227 return true;
4228 }
4229
4230
4231 /* Function vectorizable_shift.
4232
4233 Check if STMT performs a shift operation that can be vectorized.
4234 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4235 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4236 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4237
4238 static bool
4239 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4240 gimple *vec_stmt, slp_tree slp_node)
4241 {
4242 tree vec_dest;
4243 tree scalar_dest;
4244 tree op0, op1 = NULL;
4245 tree vec_oprnd1 = NULL_TREE;
4246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4247 tree vectype;
4248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4249 enum tree_code code;
4250 machine_mode vec_mode;
4251 tree new_temp;
4252 optab optab;
4253 int icode;
4254 machine_mode optab_op2_mode;
4255 tree def;
4256 gimple def_stmt;
4257 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4258 gimple new_stmt = NULL;
4259 stmt_vec_info prev_stmt_info;
4260 int nunits_in;
4261 int nunits_out;
4262 tree vectype_out;
4263 tree op1_vectype;
4264 int ncopies;
4265 int j, i;
4266 vec<tree> vec_oprnds0 = vNULL;
4267 vec<tree> vec_oprnds1 = vNULL;
4268 tree vop0, vop1;
4269 unsigned int k;
4270 bool scalar_shift_arg = true;
4271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4272 int vf;
4273
4274 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4275 return false;
4276
4277 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4278 return false;
4279
4280 /* Is STMT a vectorizable binary/unary operation? */
4281 if (!is_gimple_assign (stmt))
4282 return false;
4283
4284 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4285 return false;
4286
4287 code = gimple_assign_rhs_code (stmt);
4288
4289 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4290 || code == RROTATE_EXPR))
4291 return false;
4292
4293 scalar_dest = gimple_assign_lhs (stmt);
4294 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4295 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4296 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4297 {
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "bit-precision shifts not supported.\n");
4301 return false;
4302 }
4303
4304 op0 = gimple_assign_rhs1 (stmt);
4305 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4306 &def_stmt, &def, &dt[0], &vectype))
4307 {
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4310 "use not simple.\n");
4311 return false;
4312 }
4313 /* If op0 is an external or constant def use a vector type with
4314 the same size as the output vector type. */
4315 if (!vectype)
4316 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4317 if (vec_stmt)
4318 gcc_assert (vectype);
4319 if (!vectype)
4320 {
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4323 "no vectype for scalar type\n");
4324 return false;
4325 }
4326
4327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4328 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4329 if (nunits_out != nunits_in)
4330 return false;
4331
4332 op1 = gimple_assign_rhs2 (stmt);
4333 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4334 &def, &dt[1], &op1_vectype))
4335 {
4336 if (dump_enabled_p ())
4337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4338 "use not simple.\n");
4339 return false;
4340 }
4341
4342 if (loop_vinfo)
4343 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4344 else
4345 vf = 1;
4346
4347 /* Multiple types in SLP are handled by creating the appropriate number of
4348 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4349 case of SLP. */
4350 if (slp_node || PURE_SLP_STMT (stmt_info))
4351 ncopies = 1;
4352 else
4353 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4354
4355 gcc_assert (ncopies >= 1);
4356
4357 /* Determine whether the shift amount is a vector, or scalar. If the
4358 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4359
4360 if (dt[1] == vect_internal_def && !slp_node)
4361 scalar_shift_arg = false;
4362 else if (dt[1] == vect_constant_def
4363 || dt[1] == vect_external_def
4364 || dt[1] == vect_internal_def)
4365 {
4366 /* In SLP, need to check whether the shift count is the same,
4367 in loops if it is a constant or invariant, it is always
4368 a scalar shift. */
4369 if (slp_node)
4370 {
4371 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4372 gimple slpstmt;
4373
4374 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4375 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4376 scalar_shift_arg = false;
4377 }
4378 }
4379 else
4380 {
4381 if (dump_enabled_p ())
4382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4383 "operand mode requires invariant argument.\n");
4384 return false;
4385 }
4386
4387 /* Vector shifted by vector. */
4388 if (!scalar_shift_arg)
4389 {
4390 optab = optab_for_tree_code (code, vectype, optab_vector);
4391 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_NOTE, vect_location,
4393 "vector/vector shift/rotate found.\n");
4394
4395 if (!op1_vectype)
4396 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4397 if (op1_vectype == NULL_TREE
4398 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4399 {
4400 if (dump_enabled_p ())
4401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4402 "unusable type for last operand in"
4403 " vector/vector shift/rotate.\n");
4404 return false;
4405 }
4406 }
4407 /* See if the machine has a vector shifted by scalar insn and if not
4408 then see if it has a vector shifted by vector insn. */
4409 else
4410 {
4411 optab = optab_for_tree_code (code, vectype, optab_scalar);
4412 if (optab
4413 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4414 {
4415 if (dump_enabled_p ())
4416 dump_printf_loc (MSG_NOTE, vect_location,
4417 "vector/scalar shift/rotate found.\n");
4418 }
4419 else
4420 {
4421 optab = optab_for_tree_code (code, vectype, optab_vector);
4422 if (optab
4423 && (optab_handler (optab, TYPE_MODE (vectype))
4424 != CODE_FOR_nothing))
4425 {
4426 scalar_shift_arg = false;
4427
4428 if (dump_enabled_p ())
4429 dump_printf_loc (MSG_NOTE, vect_location,
4430 "vector/vector shift/rotate found.\n");
4431
4432 /* Unlike the other binary operators, shifts/rotates have
4433 the rhs being int, instead of the same type as the lhs,
4434 so make sure the scalar is the right type if we are
4435 dealing with vectors of long long/long/short/char. */
4436 if (dt[1] == vect_constant_def)
4437 op1 = fold_convert (TREE_TYPE (vectype), op1);
4438 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4439 TREE_TYPE (op1)))
4440 {
4441 if (slp_node
4442 && TYPE_MODE (TREE_TYPE (vectype))
4443 != TYPE_MODE (TREE_TYPE (op1)))
4444 {
4445 if (dump_enabled_p ())
4446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4447 "unusable type for last operand in"
4448 " vector/vector shift/rotate.\n");
4449 return false;
4450 }
4451 if (vec_stmt && !slp_node)
4452 {
4453 op1 = fold_convert (TREE_TYPE (vectype), op1);
4454 op1 = vect_init_vector (stmt, op1,
4455 TREE_TYPE (vectype), NULL);
4456 }
4457 }
4458 }
4459 }
4460 }
4461
4462 /* Supportable by target? */
4463 if (!optab)
4464 {
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4467 "no optab.\n");
4468 return false;
4469 }
4470 vec_mode = TYPE_MODE (vectype);
4471 icode = (int) optab_handler (optab, vec_mode);
4472 if (icode == CODE_FOR_nothing)
4473 {
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4476 "op not supported by target.\n");
4477 /* Check only during analysis. */
4478 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4479 || (vf < vect_min_worthwhile_factor (code)
4480 && !vec_stmt))
4481 return false;
4482 if (dump_enabled_p ())
4483 dump_printf_loc (MSG_NOTE, vect_location,
4484 "proceeding using word mode.\n");
4485 }
4486
4487 /* Worthwhile without SIMD support? Check only during analysis. */
4488 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4489 && vf < vect_min_worthwhile_factor (code)
4490 && !vec_stmt)
4491 {
4492 if (dump_enabled_p ())
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4494 "not worthwhile without SIMD support.\n");
4495 return false;
4496 }
4497
4498 if (!vec_stmt) /* transformation not required. */
4499 {
4500 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_NOTE, vect_location,
4503 "=== vectorizable_shift ===\n");
4504 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4505 return true;
4506 }
4507
4508 /** Transform. **/
4509
4510 if (dump_enabled_p ())
4511 dump_printf_loc (MSG_NOTE, vect_location,
4512 "transform binary/unary operation.\n");
4513
4514 /* Handle def. */
4515 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4516
4517 prev_stmt_info = NULL;
4518 for (j = 0; j < ncopies; j++)
4519 {
4520 /* Handle uses. */
4521 if (j == 0)
4522 {
4523 if (scalar_shift_arg)
4524 {
4525 /* Vector shl and shr insn patterns can be defined with scalar
4526 operand 2 (shift operand). In this case, use constant or loop
4527 invariant op1 directly, without extending it to vector mode
4528 first. */
4529 optab_op2_mode = insn_data[icode].operand[2].mode;
4530 if (!VECTOR_MODE_P (optab_op2_mode))
4531 {
4532 if (dump_enabled_p ())
4533 dump_printf_loc (MSG_NOTE, vect_location,
4534 "operand 1 using scalar mode.\n");
4535 vec_oprnd1 = op1;
4536 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4537 vec_oprnds1.quick_push (vec_oprnd1);
4538 if (slp_node)
4539 {
4540 /* Store vec_oprnd1 for every vector stmt to be created
4541 for SLP_NODE. We check during the analysis that all
4542 the shift arguments are the same.
4543 TODO: Allow different constants for different vector
4544 stmts generated for an SLP instance. */
4545 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4546 vec_oprnds1.quick_push (vec_oprnd1);
4547 }
4548 }
4549 }
4550
4551 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4552 (a special case for certain kind of vector shifts); otherwise,
4553 operand 1 should be of a vector type (the usual case). */
4554 if (vec_oprnd1)
4555 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4556 slp_node, -1);
4557 else
4558 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4559 slp_node, -1);
4560 }
4561 else
4562 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4563
4564 /* Arguments are ready. Create the new vector stmt. */
4565 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4566 {
4567 vop1 = vec_oprnds1[i];
4568 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4569 new_temp = make_ssa_name (vec_dest, new_stmt);
4570 gimple_assign_set_lhs (new_stmt, new_temp);
4571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4572 if (slp_node)
4573 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4574 }
4575
4576 if (slp_node)
4577 continue;
4578
4579 if (j == 0)
4580 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4581 else
4582 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4583 prev_stmt_info = vinfo_for_stmt (new_stmt);
4584 }
4585
4586 vec_oprnds0.release ();
4587 vec_oprnds1.release ();
4588
4589 return true;
4590 }
4591
4592
4593 /* Function vectorizable_operation.
4594
4595 Check if STMT performs a binary, unary or ternary operation that can
4596 be vectorized.
4597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4600
4601 static bool
4602 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4603 gimple *vec_stmt, slp_tree slp_node)
4604 {
4605 tree vec_dest;
4606 tree scalar_dest;
4607 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4609 tree vectype;
4610 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4611 enum tree_code code;
4612 machine_mode vec_mode;
4613 tree new_temp;
4614 int op_type;
4615 optab optab;
4616 int icode;
4617 tree def;
4618 gimple def_stmt;
4619 enum vect_def_type dt[3]
4620 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4621 gimple new_stmt = NULL;
4622 stmt_vec_info prev_stmt_info;
4623 int nunits_in;
4624 int nunits_out;
4625 tree vectype_out;
4626 int ncopies;
4627 int j, i;
4628 vec<tree> vec_oprnds0 = vNULL;
4629 vec<tree> vec_oprnds1 = vNULL;
4630 vec<tree> vec_oprnds2 = vNULL;
4631 tree vop0, vop1, vop2;
4632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4633 int vf;
4634
4635 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4636 return false;
4637
4638 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4639 return false;
4640
4641 /* Is STMT a vectorizable binary/unary operation? */
4642 if (!is_gimple_assign (stmt))
4643 return false;
4644
4645 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4646 return false;
4647
4648 code = gimple_assign_rhs_code (stmt);
4649
4650 /* For pointer addition, we should use the normal plus for
4651 the vector addition. */
4652 if (code == POINTER_PLUS_EXPR)
4653 code = PLUS_EXPR;
4654
4655 /* Support only unary or binary operations. */
4656 op_type = TREE_CODE_LENGTH (code);
4657 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4658 {
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4661 "num. args = %d (not unary/binary/ternary op).\n",
4662 op_type);
4663 return false;
4664 }
4665
4666 scalar_dest = gimple_assign_lhs (stmt);
4667 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4668
4669 /* Most operations cannot handle bit-precision types without extra
4670 truncations. */
4671 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4672 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4673 /* Exception are bitwise binary operations. */
4674 && code != BIT_IOR_EXPR
4675 && code != BIT_XOR_EXPR
4676 && code != BIT_AND_EXPR)
4677 {
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "bit-precision arithmetic not supported.\n");
4681 return false;
4682 }
4683
4684 op0 = gimple_assign_rhs1 (stmt);
4685 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4686 &def_stmt, &def, &dt[0], &vectype))
4687 {
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "use not simple.\n");
4691 return false;
4692 }
4693 /* If op0 is an external or constant def use a vector type with
4694 the same size as the output vector type. */
4695 if (!vectype)
4696 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4697 if (vec_stmt)
4698 gcc_assert (vectype);
4699 if (!vectype)
4700 {
4701 if (dump_enabled_p ())
4702 {
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4704 "no vectype for scalar type ");
4705 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4706 TREE_TYPE (op0));
4707 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4708 }
4709
4710 return false;
4711 }
4712
4713 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4714 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4715 if (nunits_out != nunits_in)
4716 return false;
4717
4718 if (op_type == binary_op || op_type == ternary_op)
4719 {
4720 op1 = gimple_assign_rhs2 (stmt);
4721 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4722 &def, &dt[1]))
4723 {
4724 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4726 "use not simple.\n");
4727 return false;
4728 }
4729 }
4730 if (op_type == ternary_op)
4731 {
4732 op2 = gimple_assign_rhs3 (stmt);
4733 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4734 &def, &dt[2]))
4735 {
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "use not simple.\n");
4739 return false;
4740 }
4741 }
4742
4743 if (loop_vinfo)
4744 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4745 else
4746 vf = 1;
4747
4748 /* Multiple types in SLP are handled by creating the appropriate number of
4749 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4750 case of SLP. */
4751 if (slp_node || PURE_SLP_STMT (stmt_info))
4752 ncopies = 1;
4753 else
4754 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4755
4756 gcc_assert (ncopies >= 1);
4757
4758 /* Shifts are handled in vectorizable_shift (). */
4759 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4760 || code == RROTATE_EXPR)
4761 return false;
4762
4763 /* Supportable by target? */
4764
4765 vec_mode = TYPE_MODE (vectype);
4766 if (code == MULT_HIGHPART_EXPR)
4767 {
4768 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4769 icode = LAST_INSN_CODE;
4770 else
4771 icode = CODE_FOR_nothing;
4772 }
4773 else
4774 {
4775 optab = optab_for_tree_code (code, vectype, optab_default);
4776 if (!optab)
4777 {
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780 "no optab.\n");
4781 return false;
4782 }
4783 icode = (int) optab_handler (optab, vec_mode);
4784 }
4785
4786 if (icode == CODE_FOR_nothing)
4787 {
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4790 "op not supported by target.\n");
4791 /* Check only during analysis. */
4792 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4793 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4794 return false;
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_NOTE, vect_location,
4797 "proceeding using word mode.\n");
4798 }
4799
4800 /* Worthwhile without SIMD support? Check only during analysis. */
4801 if (!VECTOR_MODE_P (vec_mode)
4802 && !vec_stmt
4803 && vf < vect_min_worthwhile_factor (code))
4804 {
4805 if (dump_enabled_p ())
4806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4807 "not worthwhile without SIMD support.\n");
4808 return false;
4809 }
4810
4811 if (!vec_stmt) /* transformation not required. */
4812 {
4813 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_NOTE, vect_location,
4816 "=== vectorizable_operation ===\n");
4817 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4818 return true;
4819 }
4820
4821 /** Transform. **/
4822
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_NOTE, vect_location,
4825 "transform binary/unary operation.\n");
4826
4827 /* Handle def. */
4828 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4829
4830 /* In case the vectorization factor (VF) is bigger than the number
4831 of elements that we can fit in a vectype (nunits), we have to generate
4832 more than one vector stmt - i.e - we need to "unroll" the
4833 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4834 from one copy of the vector stmt to the next, in the field
4835 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4836 stages to find the correct vector defs to be used when vectorizing
4837 stmts that use the defs of the current stmt. The example below
4838 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4839 we need to create 4 vectorized stmts):
4840
4841 before vectorization:
4842 RELATED_STMT VEC_STMT
4843 S1: x = memref - -
4844 S2: z = x + 1 - -
4845
4846 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4847 there):
4848 RELATED_STMT VEC_STMT
4849 VS1_0: vx0 = memref0 VS1_1 -
4850 VS1_1: vx1 = memref1 VS1_2 -
4851 VS1_2: vx2 = memref2 VS1_3 -
4852 VS1_3: vx3 = memref3 - -
4853 S1: x = load - VS1_0
4854 S2: z = x + 1 - -
4855
4856 step2: vectorize stmt S2 (done here):
4857 To vectorize stmt S2 we first need to find the relevant vector
4858 def for the first operand 'x'. This is, as usual, obtained from
4859 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4860 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4861 relevant vector def 'vx0'. Having found 'vx0' we can generate
4862 the vector stmt VS2_0, and as usual, record it in the
4863 STMT_VINFO_VEC_STMT of stmt S2.
4864 When creating the second copy (VS2_1), we obtain the relevant vector
4865 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4866 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4867 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4868 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4869 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4870 chain of stmts and pointers:
4871 RELATED_STMT VEC_STMT
4872 VS1_0: vx0 = memref0 VS1_1 -
4873 VS1_1: vx1 = memref1 VS1_2 -
4874 VS1_2: vx2 = memref2 VS1_3 -
4875 VS1_3: vx3 = memref3 - -
4876 S1: x = load - VS1_0
4877 VS2_0: vz0 = vx0 + v1 VS2_1 -
4878 VS2_1: vz1 = vx1 + v1 VS2_2 -
4879 VS2_2: vz2 = vx2 + v1 VS2_3 -
4880 VS2_3: vz3 = vx3 + v1 - -
4881 S2: z = x + 1 - VS2_0 */
4882
4883 prev_stmt_info = NULL;
4884 for (j = 0; j < ncopies; j++)
4885 {
4886 /* Handle uses. */
4887 if (j == 0)
4888 {
4889 if (op_type == binary_op || op_type == ternary_op)
4890 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4891 slp_node, -1);
4892 else
4893 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4894 slp_node, -1);
4895 if (op_type == ternary_op)
4896 {
4897 vec_oprnds2.create (1);
4898 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4899 stmt,
4900 NULL));
4901 }
4902 }
4903 else
4904 {
4905 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4906 if (op_type == ternary_op)
4907 {
4908 tree vec_oprnd = vec_oprnds2.pop ();
4909 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4910 vec_oprnd));
4911 }
4912 }
4913
4914 /* Arguments are ready. Create the new vector stmt. */
4915 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4916 {
4917 vop1 = ((op_type == binary_op || op_type == ternary_op)
4918 ? vec_oprnds1[i] : NULL_TREE);
4919 vop2 = ((op_type == ternary_op)
4920 ? vec_oprnds2[i] : NULL_TREE);
4921 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4922 new_temp = make_ssa_name (vec_dest, new_stmt);
4923 gimple_assign_set_lhs (new_stmt, new_temp);
4924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4925 if (slp_node)
4926 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4927 }
4928
4929 if (slp_node)
4930 continue;
4931
4932 if (j == 0)
4933 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4934 else
4935 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4936 prev_stmt_info = vinfo_for_stmt (new_stmt);
4937 }
4938
4939 vec_oprnds0.release ();
4940 vec_oprnds1.release ();
4941 vec_oprnds2.release ();
4942
4943 return true;
4944 }
4945
4946 /* A helper function to ensure data reference DR's base alignment
4947 for STMT_INFO. */
4948
4949 static void
4950 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4951 {
4952 if (!dr->aux)
4953 return;
4954
4955 if (((dataref_aux *)dr->aux)->base_misaligned)
4956 {
4957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4958 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4959
4960 if (decl_in_symtab_p (base_decl))
4961 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4962 else
4963 {
4964 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4965 DECL_USER_ALIGN (base_decl) = 1;
4966 }
4967 ((dataref_aux *)dr->aux)->base_misaligned = false;
4968 }
4969 }
4970
4971
4972 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4973 reversal of the vector elements. If that is impossible to do,
4974 returns NULL. */
4975
4976 static tree
4977 perm_mask_for_reverse (tree vectype)
4978 {
4979 int i, nunits;
4980 unsigned char *sel;
4981
4982 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4983 sel = XALLOCAVEC (unsigned char, nunits);
4984
4985 for (i = 0; i < nunits; ++i)
4986 sel[i] = nunits - 1 - i;
4987
4988 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4989 return NULL_TREE;
4990 return vect_gen_perm_mask_checked (vectype, sel);
4991 }
4992
4993 /* Function vectorizable_store.
4994
4995 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4996 can be vectorized.
4997 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4998 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4999 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5000
5001 static bool
5002 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5003 slp_tree slp_node)
5004 {
5005 tree scalar_dest;
5006 tree data_ref;
5007 tree op;
5008 tree vec_oprnd = NULL_TREE;
5009 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5010 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5012 tree elem_type;
5013 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5014 struct loop *loop = NULL;
5015 machine_mode vec_mode;
5016 tree dummy;
5017 enum dr_alignment_support alignment_support_scheme;
5018 tree def;
5019 gimple def_stmt;
5020 enum vect_def_type dt;
5021 stmt_vec_info prev_stmt_info = NULL;
5022 tree dataref_ptr = NULL_TREE;
5023 tree dataref_offset = NULL_TREE;
5024 gimple ptr_incr = NULL;
5025 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5026 int ncopies;
5027 int j;
5028 gimple next_stmt, first_stmt = NULL;
5029 bool grouped_store = false;
5030 bool store_lanes_p = false;
5031 unsigned int group_size, i;
5032 vec<tree> dr_chain = vNULL;
5033 vec<tree> oprnds = vNULL;
5034 vec<tree> result_chain = vNULL;
5035 bool inv_p;
5036 bool negative = false;
5037 tree offset = NULL_TREE;
5038 vec<tree> vec_oprnds = vNULL;
5039 bool slp = (slp_node != NULL);
5040 unsigned int vec_num;
5041 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5042 tree aggr_type;
5043
5044 if (loop_vinfo)
5045 loop = LOOP_VINFO_LOOP (loop_vinfo);
5046
5047 /* Multiple types in SLP are handled by creating the appropriate number of
5048 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5049 case of SLP. */
5050 if (slp || PURE_SLP_STMT (stmt_info))
5051 ncopies = 1;
5052 else
5053 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5054
5055 gcc_assert (ncopies >= 1);
5056
5057 /* FORNOW. This restriction should be relaxed. */
5058 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5059 {
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5062 "multiple types in nested loop.\n");
5063 return false;
5064 }
5065
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5067 return false;
5068
5069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5070 return false;
5071
5072 /* Is vectorizable store? */
5073
5074 if (!is_gimple_assign (stmt))
5075 return false;
5076
5077 scalar_dest = gimple_assign_lhs (stmt);
5078 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5079 && is_pattern_stmt_p (stmt_info))
5080 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5081 if (TREE_CODE (scalar_dest) != ARRAY_REF
5082 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5083 && TREE_CODE (scalar_dest) != INDIRECT_REF
5084 && TREE_CODE (scalar_dest) != COMPONENT_REF
5085 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5086 && TREE_CODE (scalar_dest) != REALPART_EXPR
5087 && TREE_CODE (scalar_dest) != MEM_REF)
5088 return false;
5089
5090 gcc_assert (gimple_assign_single_p (stmt));
5091 op = gimple_assign_rhs1 (stmt);
5092 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5093 &def, &dt))
5094 {
5095 if (dump_enabled_p ())
5096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5097 "use not simple.\n");
5098 return false;
5099 }
5100
5101 elem_type = TREE_TYPE (vectype);
5102 vec_mode = TYPE_MODE (vectype);
5103
5104 /* FORNOW. In some cases can vectorize even if data-type not supported
5105 (e.g. - array initialization with 0). */
5106 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5107 return false;
5108
5109 if (!STMT_VINFO_DATA_REF (stmt_info))
5110 return false;
5111
5112 if (!STMT_VINFO_STRIDED_P (stmt_info))
5113 {
5114 negative =
5115 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5116 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5117 size_zero_node) < 0;
5118 if (negative && ncopies > 1)
5119 {
5120 if (dump_enabled_p ())
5121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5122 "multiple types with negative step.\n");
5123 return false;
5124 }
5125 if (negative)
5126 {
5127 gcc_assert (!grouped_store);
5128 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5129 if (alignment_support_scheme != dr_aligned
5130 && alignment_support_scheme != dr_unaligned_supported)
5131 {
5132 if (dump_enabled_p ())
5133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5134 "negative step but alignment required.\n");
5135 return false;
5136 }
5137 if (dt != vect_constant_def
5138 && dt != vect_external_def
5139 && !perm_mask_for_reverse (vectype))
5140 {
5141 if (dump_enabled_p ())
5142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5143 "negative step and reversing not supported.\n");
5144 return false;
5145 }
5146 }
5147 }
5148
5149 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5150 {
5151 grouped_store = true;
5152 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5153 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5154 if (!slp
5155 && !PURE_SLP_STMT (stmt_info)
5156 && !STMT_VINFO_STRIDED_P (stmt_info))
5157 {
5158 if (vect_store_lanes_supported (vectype, group_size))
5159 store_lanes_p = true;
5160 else if (!vect_grouped_store_supported (vectype, group_size))
5161 return false;
5162 }
5163
5164 if (STMT_VINFO_STRIDED_P (stmt_info)
5165 && (slp || PURE_SLP_STMT (stmt_info))
5166 && (group_size > nunits
5167 || nunits % group_size != 0))
5168 {
5169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5170 "unhandled strided group store\n");
5171 return false;
5172 }
5173
5174 if (first_stmt == stmt)
5175 {
5176 /* STMT is the leader of the group. Check the operands of all the
5177 stmts of the group. */
5178 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5179 while (next_stmt)
5180 {
5181 gcc_assert (gimple_assign_single_p (next_stmt));
5182 op = gimple_assign_rhs1 (next_stmt);
5183 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5184 &def_stmt, &def, &dt))
5185 {
5186 if (dump_enabled_p ())
5187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5188 "use not simple.\n");
5189 return false;
5190 }
5191 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5192 }
5193 }
5194 }
5195
5196 if (!vec_stmt) /* transformation not required. */
5197 {
5198 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5199 /* The SLP costs are calculated during SLP analysis. */
5200 if (!PURE_SLP_STMT (stmt_info))
5201 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5202 NULL, NULL, NULL);
5203 return true;
5204 }
5205
5206 /** Transform. **/
5207
5208 ensure_base_align (stmt_info, dr);
5209
5210 if (grouped_store)
5211 {
5212 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5213 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5214
5215 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5216
5217 /* FORNOW */
5218 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5219
5220 /* We vectorize all the stmts of the interleaving group when we
5221 reach the last stmt in the group. */
5222 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5223 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5224 && !slp)
5225 {
5226 *vec_stmt = NULL;
5227 return true;
5228 }
5229
5230 if (slp)
5231 {
5232 grouped_store = false;
5233 /* VEC_NUM is the number of vect stmts to be created for this
5234 group. */
5235 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5236 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5237 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5238 op = gimple_assign_rhs1 (first_stmt);
5239 }
5240 else
5241 /* VEC_NUM is the number of vect stmts to be created for this
5242 group. */
5243 vec_num = group_size;
5244 }
5245 else
5246 {
5247 first_stmt = stmt;
5248 first_dr = dr;
5249 group_size = vec_num = 1;
5250 }
5251
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_NOTE, vect_location,
5254 "transform store. ncopies = %d\n", ncopies);
5255
5256 if (STMT_VINFO_STRIDED_P (stmt_info))
5257 {
5258 gimple_stmt_iterator incr_gsi;
5259 bool insert_after;
5260 gimple incr;
5261 tree offvar;
5262 tree ivstep;
5263 tree running_off;
5264 gimple_seq stmts = NULL;
5265 tree stride_base, stride_step, alias_off;
5266 tree vec_oprnd;
5267
5268 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5269
5270 stride_base
5271 = fold_build_pointer_plus
5272 (unshare_expr (DR_BASE_ADDRESS (dr)),
5273 size_binop (PLUS_EXPR,
5274 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5275 convert_to_ptrofftype (DR_INIT(dr))));
5276 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5277
5278 /* For a store with loop-invariant (but other than power-of-2)
5279 stride (i.e. not a grouped access) like so:
5280
5281 for (i = 0; i < n; i += stride)
5282 array[i] = ...;
5283
5284 we generate a new induction variable and new stores from
5285 the components of the (vectorized) rhs:
5286
5287 for (j = 0; ; j += VF*stride)
5288 vectemp = ...;
5289 tmp1 = vectemp[0];
5290 array[j] = tmp1;
5291 tmp2 = vectemp[1];
5292 array[j + stride] = tmp2;
5293 ...
5294 */
5295
5296 unsigned nstores = nunits;
5297 tree ltype = elem_type;
5298 if (slp)
5299 {
5300 nstores = nunits / group_size;
5301 if (group_size < nunits)
5302 ltype = build_vector_type (elem_type, group_size);
5303 else
5304 ltype = vectype;
5305 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5306 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5307 }
5308
5309 ivstep = stride_step;
5310 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5311 build_int_cst (TREE_TYPE (ivstep),
5312 ncopies * nstores));
5313
5314 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5315
5316 create_iv (stride_base, ivstep, NULL,
5317 loop, &incr_gsi, insert_after,
5318 &offvar, NULL);
5319 incr = gsi_stmt (incr_gsi);
5320 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5321
5322 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5323 if (stmts)
5324 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5325
5326 prev_stmt_info = NULL;
5327 running_off = offvar;
5328 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5329 for (j = 0; j < ncopies; j++)
5330 {
5331 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5332 and first_stmt == stmt. */
5333 if (j == 0)
5334 {
5335 if (slp)
5336 {
5337 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5338 slp_node, -1);
5339 vec_oprnd = vec_oprnds[0];
5340 }
5341 else
5342 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5343 }
5344 else
5345 {
5346 if (slp)
5347 vec_oprnd = vec_oprnds[j];
5348 else
5349 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5350 }
5351
5352 for (i = 0; i < nstores; i++)
5353 {
5354 tree newref, newoff;
5355 gimple incr, assign;
5356 tree size = TYPE_SIZE (ltype);
5357 /* Extract the i'th component. */
5358 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5359 size);
5360 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5361 size, pos);
5362
5363 elem = force_gimple_operand_gsi (gsi, elem, true,
5364 NULL_TREE, true,
5365 GSI_SAME_STMT);
5366
5367 newref = build2 (MEM_REF, ltype,
5368 running_off, alias_off);
5369
5370 /* And store it to *running_off. */
5371 assign = gimple_build_assign (newref, elem);
5372 vect_finish_stmt_generation (stmt, assign, gsi);
5373
5374 newoff = copy_ssa_name (running_off, NULL);
5375 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5376 running_off, stride_step);
5377 vect_finish_stmt_generation (stmt, incr, gsi);
5378
5379 running_off = newoff;
5380 if (j == 0 && i == 0)
5381 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5382 else
5383 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5384 prev_stmt_info = vinfo_for_stmt (assign);
5385 }
5386 }
5387 return true;
5388 }
5389
5390 dr_chain.create (group_size);
5391 oprnds.create (group_size);
5392
5393 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5394 gcc_assert (alignment_support_scheme);
5395 /* Targets with store-lane instructions must not require explicit
5396 realignment. */
5397 gcc_assert (!store_lanes_p
5398 || alignment_support_scheme == dr_aligned
5399 || alignment_support_scheme == dr_unaligned_supported);
5400
5401 if (negative)
5402 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5403
5404 if (store_lanes_p)
5405 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5406 else
5407 aggr_type = vectype;
5408
5409 /* In case the vectorization factor (VF) is bigger than the number
5410 of elements that we can fit in a vectype (nunits), we have to generate
5411 more than one vector stmt - i.e - we need to "unroll" the
5412 vector stmt by a factor VF/nunits. For more details see documentation in
5413 vect_get_vec_def_for_copy_stmt. */
5414
5415 /* In case of interleaving (non-unit grouped access):
5416
5417 S1: &base + 2 = x2
5418 S2: &base = x0
5419 S3: &base + 1 = x1
5420 S4: &base + 3 = x3
5421
5422 We create vectorized stores starting from base address (the access of the
5423 first stmt in the chain (S2 in the above example), when the last store stmt
5424 of the chain (S4) is reached:
5425
5426 VS1: &base = vx2
5427 VS2: &base + vec_size*1 = vx0
5428 VS3: &base + vec_size*2 = vx1
5429 VS4: &base + vec_size*3 = vx3
5430
5431 Then permutation statements are generated:
5432
5433 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5434 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5435 ...
5436
5437 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5438 (the order of the data-refs in the output of vect_permute_store_chain
5439 corresponds to the order of scalar stmts in the interleaving chain - see
5440 the documentation of vect_permute_store_chain()).
5441
5442 In case of both multiple types and interleaving, above vector stores and
5443 permutation stmts are created for every copy. The result vector stmts are
5444 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5445 STMT_VINFO_RELATED_STMT for the next copies.
5446 */
5447
5448 prev_stmt_info = NULL;
5449 for (j = 0; j < ncopies; j++)
5450 {
5451 gimple new_stmt;
5452
5453 if (j == 0)
5454 {
5455 if (slp)
5456 {
5457 /* Get vectorized arguments for SLP_NODE. */
5458 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5459 NULL, slp_node, -1);
5460
5461 vec_oprnd = vec_oprnds[0];
5462 }
5463 else
5464 {
5465 /* For interleaved stores we collect vectorized defs for all the
5466 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5467 used as an input to vect_permute_store_chain(), and OPRNDS as
5468 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5469
5470 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5471 OPRNDS are of size 1. */
5472 next_stmt = first_stmt;
5473 for (i = 0; i < group_size; i++)
5474 {
5475 /* Since gaps are not supported for interleaved stores,
5476 GROUP_SIZE is the exact number of stmts in the chain.
5477 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5478 there is no interleaving, GROUP_SIZE is 1, and only one
5479 iteration of the loop will be executed. */
5480 gcc_assert (next_stmt
5481 && gimple_assign_single_p (next_stmt));
5482 op = gimple_assign_rhs1 (next_stmt);
5483
5484 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5485 NULL);
5486 dr_chain.quick_push (vec_oprnd);
5487 oprnds.quick_push (vec_oprnd);
5488 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5489 }
5490 }
5491
5492 /* We should have catched mismatched types earlier. */
5493 gcc_assert (useless_type_conversion_p (vectype,
5494 TREE_TYPE (vec_oprnd)));
5495 bool simd_lane_access_p
5496 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5497 if (simd_lane_access_p
5498 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5499 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5500 && integer_zerop (DR_OFFSET (first_dr))
5501 && integer_zerop (DR_INIT (first_dr))
5502 && alias_sets_conflict_p (get_alias_set (aggr_type),
5503 get_alias_set (DR_REF (first_dr))))
5504 {
5505 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5506 dataref_offset = build_int_cst (reference_alias_ptr_type
5507 (DR_REF (first_dr)), 0);
5508 inv_p = false;
5509 }
5510 else
5511 dataref_ptr
5512 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5513 simd_lane_access_p ? loop : NULL,
5514 offset, &dummy, gsi, &ptr_incr,
5515 simd_lane_access_p, &inv_p);
5516 gcc_assert (bb_vinfo || !inv_p);
5517 }
5518 else
5519 {
5520 /* For interleaved stores we created vectorized defs for all the
5521 defs stored in OPRNDS in the previous iteration (previous copy).
5522 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5523 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5524 next copy.
5525 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5526 OPRNDS are of size 1. */
5527 for (i = 0; i < group_size; i++)
5528 {
5529 op = oprnds[i];
5530 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5531 &def, &dt);
5532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5533 dr_chain[i] = vec_oprnd;
5534 oprnds[i] = vec_oprnd;
5535 }
5536 if (dataref_offset)
5537 dataref_offset
5538 = int_const_binop (PLUS_EXPR, dataref_offset,
5539 TYPE_SIZE_UNIT (aggr_type));
5540 else
5541 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5542 TYPE_SIZE_UNIT (aggr_type));
5543 }
5544
5545 if (store_lanes_p)
5546 {
5547 tree vec_array;
5548
5549 /* Combine all the vectors into an array. */
5550 vec_array = create_vector_array (vectype, vec_num);
5551 for (i = 0; i < vec_num; i++)
5552 {
5553 vec_oprnd = dr_chain[i];
5554 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5555 }
5556
5557 /* Emit:
5558 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5559 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5560 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5561 gimple_call_set_lhs (new_stmt, data_ref);
5562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5563 }
5564 else
5565 {
5566 new_stmt = NULL;
5567 if (grouped_store)
5568 {
5569 if (j == 0)
5570 result_chain.create (group_size);
5571 /* Permute. */
5572 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5573 &result_chain);
5574 }
5575
5576 next_stmt = first_stmt;
5577 for (i = 0; i < vec_num; i++)
5578 {
5579 unsigned align, misalign;
5580
5581 if (i > 0)
5582 /* Bump the vector pointer. */
5583 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5584 stmt, NULL_TREE);
5585
5586 if (slp)
5587 vec_oprnd = vec_oprnds[i];
5588 else if (grouped_store)
5589 /* For grouped stores vectorized defs are interleaved in
5590 vect_permute_store_chain(). */
5591 vec_oprnd = result_chain[i];
5592
5593 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5594 dataref_ptr,
5595 dataref_offset
5596 ? dataref_offset
5597 : build_int_cst (reference_alias_ptr_type
5598 (DR_REF (first_dr)), 0));
5599 align = TYPE_ALIGN_UNIT (vectype);
5600 if (aligned_access_p (first_dr))
5601 misalign = 0;
5602 else if (DR_MISALIGNMENT (first_dr) == -1)
5603 {
5604 TREE_TYPE (data_ref)
5605 = build_aligned_type (TREE_TYPE (data_ref),
5606 TYPE_ALIGN (elem_type));
5607 align = TYPE_ALIGN_UNIT (elem_type);
5608 misalign = 0;
5609 }
5610 else
5611 {
5612 TREE_TYPE (data_ref)
5613 = build_aligned_type (TREE_TYPE (data_ref),
5614 TYPE_ALIGN (elem_type));
5615 misalign = DR_MISALIGNMENT (first_dr);
5616 }
5617 if (dataref_offset == NULL_TREE
5618 && TREE_CODE (dataref_ptr) == SSA_NAME)
5619 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5620 misalign);
5621
5622 if (negative
5623 && dt != vect_constant_def
5624 && dt != vect_external_def)
5625 {
5626 tree perm_mask = perm_mask_for_reverse (vectype);
5627 tree perm_dest
5628 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5629 vectype);
5630 tree new_temp = make_ssa_name (perm_dest);
5631
5632 /* Generate the permute statement. */
5633 gimple perm_stmt
5634 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5635 vec_oprnd, perm_mask);
5636 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5637
5638 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5639 vec_oprnd = new_temp;
5640 }
5641
5642 /* Arguments are ready. Create the new vector stmt. */
5643 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5644 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5645
5646 if (slp)
5647 continue;
5648
5649 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5650 if (!next_stmt)
5651 break;
5652 }
5653 }
5654 if (!slp)
5655 {
5656 if (j == 0)
5657 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5658 else
5659 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5660 prev_stmt_info = vinfo_for_stmt (new_stmt);
5661 }
5662 }
5663
5664 dr_chain.release ();
5665 oprnds.release ();
5666 result_chain.release ();
5667 vec_oprnds.release ();
5668
5669 return true;
5670 }
5671
5672 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5673 VECTOR_CST mask. No checks are made that the target platform supports the
5674 mask, so callers may wish to test can_vec_perm_p separately, or use
5675 vect_gen_perm_mask_checked. */
5676
5677 tree
5678 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5679 {
5680 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5681 int i, nunits;
5682
5683 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5684
5685 mask_elt_type = lang_hooks.types.type_for_mode
5686 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5687 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5688
5689 mask_elts = XALLOCAVEC (tree, nunits);
5690 for (i = nunits - 1; i >= 0; i--)
5691 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5692 mask_vec = build_vector (mask_type, mask_elts);
5693
5694 return mask_vec;
5695 }
5696
5697 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5698 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5699
5700 tree
5701 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5702 {
5703 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5704 return vect_gen_perm_mask_any (vectype, sel);
5705 }
5706
5707 /* Given a vector variable X and Y, that was generated for the scalar
5708 STMT, generate instructions to permute the vector elements of X and Y
5709 using permutation mask MASK_VEC, insert them at *GSI and return the
5710 permuted vector variable. */
5711
5712 static tree
5713 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5714 gimple_stmt_iterator *gsi)
5715 {
5716 tree vectype = TREE_TYPE (x);
5717 tree perm_dest, data_ref;
5718 gimple perm_stmt;
5719
5720 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5721 data_ref = make_ssa_name (perm_dest);
5722
5723 /* Generate the permute statement. */
5724 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5725 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5726
5727 return data_ref;
5728 }
5729
5730 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5731 inserting them on the loops preheader edge. Returns true if we
5732 were successful in doing so (and thus STMT can be moved then),
5733 otherwise returns false. */
5734
5735 static bool
5736 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5737 {
5738 ssa_op_iter i;
5739 tree op;
5740 bool any = false;
5741
5742 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5743 {
5744 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5745 if (!gimple_nop_p (def_stmt)
5746 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5747 {
5748 /* Make sure we don't need to recurse. While we could do
5749 so in simple cases when there are more complex use webs
5750 we don't have an easy way to preserve stmt order to fulfil
5751 dependencies within them. */
5752 tree op2;
5753 ssa_op_iter i2;
5754 if (gimple_code (def_stmt) == GIMPLE_PHI)
5755 return false;
5756 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5757 {
5758 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5759 if (!gimple_nop_p (def_stmt2)
5760 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5761 return false;
5762 }
5763 any = true;
5764 }
5765 }
5766
5767 if (!any)
5768 return true;
5769
5770 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5771 {
5772 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5773 if (!gimple_nop_p (def_stmt)
5774 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5775 {
5776 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5777 gsi_remove (&gsi, false);
5778 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5779 }
5780 }
5781
5782 return true;
5783 }
5784
5785 /* vectorizable_load.
5786
5787 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5788 can be vectorized.
5789 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5790 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5791 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5792
5793 static bool
5794 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5795 slp_tree slp_node, slp_instance slp_node_instance)
5796 {
5797 tree scalar_dest;
5798 tree vec_dest = NULL;
5799 tree data_ref = NULL;
5800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5801 stmt_vec_info prev_stmt_info;
5802 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5803 struct loop *loop = NULL;
5804 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5805 bool nested_in_vect_loop = false;
5806 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5807 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5808 tree elem_type;
5809 tree new_temp;
5810 machine_mode mode;
5811 gimple new_stmt = NULL;
5812 tree dummy;
5813 enum dr_alignment_support alignment_support_scheme;
5814 tree dataref_ptr = NULL_TREE;
5815 tree dataref_offset = NULL_TREE;
5816 gimple ptr_incr = NULL;
5817 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5818 int ncopies;
5819 int i, j, group_size = -1, group_gap_adj;
5820 tree msq = NULL_TREE, lsq;
5821 tree offset = NULL_TREE;
5822 tree byte_offset = NULL_TREE;
5823 tree realignment_token = NULL_TREE;
5824 gphi *phi = NULL;
5825 vec<tree> dr_chain = vNULL;
5826 bool grouped_load = false;
5827 bool load_lanes_p = false;
5828 gimple first_stmt;
5829 bool inv_p;
5830 bool negative = false;
5831 bool compute_in_loop = false;
5832 struct loop *at_loop;
5833 int vec_num;
5834 bool slp = (slp_node != NULL);
5835 bool slp_perm = false;
5836 enum tree_code code;
5837 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5838 int vf;
5839 tree aggr_type;
5840 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5841 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5842 int gather_scale = 1;
5843 enum vect_def_type gather_dt = vect_unknown_def_type;
5844
5845 if (loop_vinfo)
5846 {
5847 loop = LOOP_VINFO_LOOP (loop_vinfo);
5848 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5849 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5850 }
5851 else
5852 vf = 1;
5853
5854 /* Multiple types in SLP are handled by creating the appropriate number of
5855 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5856 case of SLP. */
5857 if (slp || PURE_SLP_STMT (stmt_info))
5858 ncopies = 1;
5859 else
5860 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5861
5862 gcc_assert (ncopies >= 1);
5863
5864 /* FORNOW. This restriction should be relaxed. */
5865 if (nested_in_vect_loop && ncopies > 1)
5866 {
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5869 "multiple types in nested loop.\n");
5870 return false;
5871 }
5872
5873 /* Invalidate assumptions made by dependence analysis when vectorization
5874 on the unrolled body effectively re-orders stmts. */
5875 if (ncopies > 1
5876 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5877 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5878 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5879 {
5880 if (dump_enabled_p ())
5881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5882 "cannot perform implicit CSE when unrolling "
5883 "with negative dependence distance\n");
5884 return false;
5885 }
5886
5887 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5888 return false;
5889
5890 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5891 return false;
5892
5893 /* Is vectorizable load? */
5894 if (!is_gimple_assign (stmt))
5895 return false;
5896
5897 scalar_dest = gimple_assign_lhs (stmt);
5898 if (TREE_CODE (scalar_dest) != SSA_NAME)
5899 return false;
5900
5901 code = gimple_assign_rhs_code (stmt);
5902 if (code != ARRAY_REF
5903 && code != BIT_FIELD_REF
5904 && code != INDIRECT_REF
5905 && code != COMPONENT_REF
5906 && code != IMAGPART_EXPR
5907 && code != REALPART_EXPR
5908 && code != MEM_REF
5909 && TREE_CODE_CLASS (code) != tcc_declaration)
5910 return false;
5911
5912 if (!STMT_VINFO_DATA_REF (stmt_info))
5913 return false;
5914
5915 elem_type = TREE_TYPE (vectype);
5916 mode = TYPE_MODE (vectype);
5917
5918 /* FORNOW. In some cases can vectorize even if data-type not supported
5919 (e.g. - data copies). */
5920 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5921 {
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 "Aligned load, but unsupported type.\n");
5925 return false;
5926 }
5927
5928 /* Check if the load is a part of an interleaving chain. */
5929 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5930 {
5931 grouped_load = true;
5932 /* FORNOW */
5933 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5934
5935 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5936
5937 /* If this is single-element interleaving with an element distance
5938 that leaves unused vector loads around punt - we at least create
5939 very sub-optimal code in that case (and blow up memory,
5940 see PR65518). */
5941 if (first_stmt == stmt
5942 && !GROUP_NEXT_ELEMENT (stmt_info)
5943 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5944 {
5945 if (dump_enabled_p ())
5946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5947 "single-element interleaving not supported "
5948 "for not adjacent vector loads\n");
5949 return false;
5950 }
5951
5952 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5953 slp_perm = true;
5954
5955 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5956 if (!slp
5957 && !PURE_SLP_STMT (stmt_info)
5958 && !STMT_VINFO_STRIDED_P (stmt_info))
5959 {
5960 if (vect_load_lanes_supported (vectype, group_size))
5961 load_lanes_p = true;
5962 else if (!vect_grouped_load_supported (vectype, group_size))
5963 return false;
5964 }
5965
5966 /* Invalidate assumptions made by dependence analysis when vectorization
5967 on the unrolled body effectively re-orders stmts. */
5968 if (!PURE_SLP_STMT (stmt_info)
5969 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5970 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5971 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5972 {
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5975 "cannot perform implicit CSE when performing "
5976 "group loads with negative dependence distance\n");
5977 return false;
5978 }
5979
5980 /* Similarly when the stmt is a load that is both part of a SLP
5981 instance and a loop vectorized stmt via the same-dr mechanism
5982 we have to give up. */
5983 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5984 && (STMT_SLP_TYPE (stmt_info)
5985 != STMT_SLP_TYPE (vinfo_for_stmt
5986 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5987 {
5988 if (dump_enabled_p ())
5989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5990 "conflicting SLP types for CSEd load\n");
5991 return false;
5992 }
5993 }
5994
5995
5996 if (STMT_VINFO_GATHER_P (stmt_info))
5997 {
5998 gimple def_stmt;
5999 tree def;
6000 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
6001 &gather_off, &gather_scale);
6002 gcc_assert (gather_decl);
6003 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
6004 &def_stmt, &def, &gather_dt,
6005 &gather_off_vectype))
6006 {
6007 if (dump_enabled_p ())
6008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6009 "gather index use not simple.\n");
6010 return false;
6011 }
6012 }
6013 else if (STMT_VINFO_STRIDED_P (stmt_info))
6014 {
6015 if ((grouped_load
6016 && (slp || PURE_SLP_STMT (stmt_info)))
6017 && (group_size > nunits
6018 || nunits % group_size != 0))
6019 {
6020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6021 "unhandled strided group load\n");
6022 return false;
6023 }
6024 }
6025 else
6026 {
6027 negative = tree_int_cst_compare (nested_in_vect_loop
6028 ? STMT_VINFO_DR_STEP (stmt_info)
6029 : DR_STEP (dr),
6030 size_zero_node) < 0;
6031 if (negative && ncopies > 1)
6032 {
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "multiple types with negative step.\n");
6036 return false;
6037 }
6038
6039 if (negative)
6040 {
6041 if (grouped_load)
6042 {
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "negative step for group load not supported"
6046 "\n");
6047 return false;
6048 }
6049 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6050 if (alignment_support_scheme != dr_aligned
6051 && alignment_support_scheme != dr_unaligned_supported)
6052 {
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6055 "negative step but alignment required.\n");
6056 return false;
6057 }
6058 if (!perm_mask_for_reverse (vectype))
6059 {
6060 if (dump_enabled_p ())
6061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6062 "negative step and reversing not supported."
6063 "\n");
6064 return false;
6065 }
6066 }
6067 }
6068
6069 if (!vec_stmt) /* transformation not required. */
6070 {
6071 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6072 /* The SLP costs are calculated during SLP analysis. */
6073 if (!PURE_SLP_STMT (stmt_info))
6074 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6075 NULL, NULL, NULL);
6076 return true;
6077 }
6078
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_NOTE, vect_location,
6081 "transform load. ncopies = %d\n", ncopies);
6082
6083 /** Transform. **/
6084
6085 ensure_base_align (stmt_info, dr);
6086
6087 if (STMT_VINFO_GATHER_P (stmt_info))
6088 {
6089 tree vec_oprnd0 = NULL_TREE, op;
6090 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6091 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6092 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6093 edge pe = loop_preheader_edge (loop);
6094 gimple_seq seq;
6095 basic_block new_bb;
6096 enum { NARROW, NONE, WIDEN } modifier;
6097 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6098
6099 if (nunits == gather_off_nunits)
6100 modifier = NONE;
6101 else if (nunits == gather_off_nunits / 2)
6102 {
6103 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6104 modifier = WIDEN;
6105
6106 for (i = 0; i < gather_off_nunits; ++i)
6107 sel[i] = i | nunits;
6108
6109 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6110 }
6111 else if (nunits == gather_off_nunits * 2)
6112 {
6113 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6114 modifier = NARROW;
6115
6116 for (i = 0; i < nunits; ++i)
6117 sel[i] = i < gather_off_nunits
6118 ? i : i + nunits - gather_off_nunits;
6119
6120 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6121 ncopies *= 2;
6122 }
6123 else
6124 gcc_unreachable ();
6125
6126 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6127 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6128 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6129 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6130 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6131 scaletype = TREE_VALUE (arglist);
6132 gcc_checking_assert (types_compatible_p (srctype, rettype));
6133
6134 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6135
6136 ptr = fold_convert (ptrtype, gather_base);
6137 if (!is_gimple_min_invariant (ptr))
6138 {
6139 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6140 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6141 gcc_assert (!new_bb);
6142 }
6143
6144 /* Currently we support only unconditional gather loads,
6145 so mask should be all ones. */
6146 if (TREE_CODE (masktype) == INTEGER_TYPE)
6147 mask = build_int_cst (masktype, -1);
6148 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6149 {
6150 mask = build_int_cst (TREE_TYPE (masktype), -1);
6151 mask = build_vector_from_val (masktype, mask);
6152 mask = vect_init_vector (stmt, mask, masktype, NULL);
6153 }
6154 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6155 {
6156 REAL_VALUE_TYPE r;
6157 long tmp[6];
6158 for (j = 0; j < 6; ++j)
6159 tmp[j] = -1;
6160 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6161 mask = build_real (TREE_TYPE (masktype), r);
6162 mask = build_vector_from_val (masktype, mask);
6163 mask = vect_init_vector (stmt, mask, masktype, NULL);
6164 }
6165 else
6166 gcc_unreachable ();
6167
6168 scale = build_int_cst (scaletype, gather_scale);
6169
6170 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6171 merge = build_int_cst (TREE_TYPE (rettype), 0);
6172 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6173 {
6174 REAL_VALUE_TYPE r;
6175 long tmp[6];
6176 for (j = 0; j < 6; ++j)
6177 tmp[j] = 0;
6178 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6179 merge = build_real (TREE_TYPE (rettype), r);
6180 }
6181 else
6182 gcc_unreachable ();
6183 merge = build_vector_from_val (rettype, merge);
6184 merge = vect_init_vector (stmt, merge, rettype, NULL);
6185
6186 prev_stmt_info = NULL;
6187 for (j = 0; j < ncopies; ++j)
6188 {
6189 if (modifier == WIDEN && (j & 1))
6190 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6191 perm_mask, stmt, gsi);
6192 else if (j == 0)
6193 op = vec_oprnd0
6194 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6195 else
6196 op = vec_oprnd0
6197 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6198
6199 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6200 {
6201 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6202 == TYPE_VECTOR_SUBPARTS (idxtype));
6203 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6204 var = make_ssa_name (var);
6205 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6206 new_stmt
6207 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6209 op = var;
6210 }
6211
6212 new_stmt
6213 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6214
6215 if (!useless_type_conversion_p (vectype, rettype))
6216 {
6217 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6218 == TYPE_VECTOR_SUBPARTS (rettype));
6219 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6220 op = make_ssa_name (var, new_stmt);
6221 gimple_call_set_lhs (new_stmt, op);
6222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6223 var = make_ssa_name (vec_dest);
6224 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6225 new_stmt
6226 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6227 }
6228 else
6229 {
6230 var = make_ssa_name (vec_dest, new_stmt);
6231 gimple_call_set_lhs (new_stmt, var);
6232 }
6233
6234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6235
6236 if (modifier == NARROW)
6237 {
6238 if ((j & 1) == 0)
6239 {
6240 prev_res = var;
6241 continue;
6242 }
6243 var = permute_vec_elements (prev_res, var,
6244 perm_mask, stmt, gsi);
6245 new_stmt = SSA_NAME_DEF_STMT (var);
6246 }
6247
6248 if (prev_stmt_info == NULL)
6249 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6250 else
6251 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6252 prev_stmt_info = vinfo_for_stmt (new_stmt);
6253 }
6254 return true;
6255 }
6256 else if (STMT_VINFO_STRIDED_P (stmt_info))
6257 {
6258 gimple_stmt_iterator incr_gsi;
6259 bool insert_after;
6260 gimple incr;
6261 tree offvar;
6262 tree ivstep;
6263 tree running_off;
6264 vec<constructor_elt, va_gc> *v = NULL;
6265 gimple_seq stmts = NULL;
6266 tree stride_base, stride_step, alias_off;
6267
6268 gcc_assert (!nested_in_vect_loop);
6269
6270 if (grouped_load)
6271 first_dr = STMT_VINFO_DATA_REF
6272 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6273 else
6274 first_dr = dr;
6275
6276 stride_base
6277 = fold_build_pointer_plus
6278 (DR_BASE_ADDRESS (first_dr),
6279 size_binop (PLUS_EXPR,
6280 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6281 convert_to_ptrofftype (DR_INIT (first_dr))));
6282 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6283
6284 /* For a load with loop-invariant (but other than power-of-2)
6285 stride (i.e. not a grouped access) like so:
6286
6287 for (i = 0; i < n; i += stride)
6288 ... = array[i];
6289
6290 we generate a new induction variable and new accesses to
6291 form a new vector (or vectors, depending on ncopies):
6292
6293 for (j = 0; ; j += VF*stride)
6294 tmp1 = array[j];
6295 tmp2 = array[j + stride];
6296 ...
6297 vectemp = {tmp1, tmp2, ...}
6298 */
6299
6300 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6301 build_int_cst (TREE_TYPE (stride_step), vf));
6302
6303 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6304
6305 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6306 loop, &incr_gsi, insert_after,
6307 &offvar, NULL);
6308 incr = gsi_stmt (incr_gsi);
6309 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6310
6311 stride_step = force_gimple_operand (unshare_expr (stride_step),
6312 &stmts, true, NULL_TREE);
6313 if (stmts)
6314 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6315
6316 prev_stmt_info = NULL;
6317 running_off = offvar;
6318 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6319 int nloads = nunits;
6320 tree ltype = TREE_TYPE (vectype);
6321 auto_vec<tree> dr_chain;
6322 if (slp)
6323 {
6324 nloads = nunits / group_size;
6325 if (group_size < nunits)
6326 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6327 else
6328 ltype = vectype;
6329 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6330 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6331 if (slp_perm)
6332 dr_chain.create (ncopies);
6333 }
6334 for (j = 0; j < ncopies; j++)
6335 {
6336 tree vec_inv;
6337
6338 if (nloads > 1)
6339 {
6340 vec_alloc (v, nloads);
6341 for (i = 0; i < nloads; i++)
6342 {
6343 tree newref, newoff;
6344 gimple incr;
6345 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6346
6347 newref = force_gimple_operand_gsi (gsi, newref, true,
6348 NULL_TREE, true,
6349 GSI_SAME_STMT);
6350 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6351 newoff = copy_ssa_name (running_off);
6352 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6353 running_off, stride_step);
6354 vect_finish_stmt_generation (stmt, incr, gsi);
6355
6356 running_off = newoff;
6357 }
6358
6359 vec_inv = build_constructor (vectype, v);
6360 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6361 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6362 }
6363 else
6364 {
6365 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6366 build2 (MEM_REF, ltype,
6367 running_off, alias_off));
6368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6369
6370 tree newoff = copy_ssa_name (running_off);
6371 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6372 running_off, stride_step);
6373 vect_finish_stmt_generation (stmt, incr, gsi);
6374
6375 running_off = newoff;
6376 }
6377
6378 if (slp)
6379 {
6380 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6381 if (slp_perm)
6382 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6383 }
6384 if (j == 0)
6385 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6386 else
6387 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6388 prev_stmt_info = vinfo_for_stmt (new_stmt);
6389 }
6390 if (slp_perm)
6391 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6392 slp_node_instance, false);
6393 return true;
6394 }
6395
6396 if (grouped_load)
6397 {
6398 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6399 if (slp
6400 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6401 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6402 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6403
6404 /* Check if the chain of loads is already vectorized. */
6405 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6406 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6407 ??? But we can only do so if there is exactly one
6408 as we have no way to get at the rest. Leave the CSE
6409 opportunity alone.
6410 ??? With the group load eventually participating
6411 in multiple different permutations (having multiple
6412 slp nodes which refer to the same group) the CSE
6413 is even wrong code. See PR56270. */
6414 && !slp)
6415 {
6416 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6417 return true;
6418 }
6419 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6420 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6421 group_gap_adj = 0;
6422
6423 /* VEC_NUM is the number of vect stmts to be created for this group. */
6424 if (slp)
6425 {
6426 grouped_load = false;
6427 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6428 group_gap_adj = vf * group_size - nunits * vec_num;
6429 }
6430 else
6431 vec_num = group_size;
6432 }
6433 else
6434 {
6435 first_stmt = stmt;
6436 first_dr = dr;
6437 group_size = vec_num = 1;
6438 group_gap_adj = 0;
6439 }
6440
6441 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6442 gcc_assert (alignment_support_scheme);
6443 /* Targets with load-lane instructions must not require explicit
6444 realignment. */
6445 gcc_assert (!load_lanes_p
6446 || alignment_support_scheme == dr_aligned
6447 || alignment_support_scheme == dr_unaligned_supported);
6448
6449 /* In case the vectorization factor (VF) is bigger than the number
6450 of elements that we can fit in a vectype (nunits), we have to generate
6451 more than one vector stmt - i.e - we need to "unroll" the
6452 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6453 from one copy of the vector stmt to the next, in the field
6454 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6455 stages to find the correct vector defs to be used when vectorizing
6456 stmts that use the defs of the current stmt. The example below
6457 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6458 need to create 4 vectorized stmts):
6459
6460 before vectorization:
6461 RELATED_STMT VEC_STMT
6462 S1: x = memref - -
6463 S2: z = x + 1 - -
6464
6465 step 1: vectorize stmt S1:
6466 We first create the vector stmt VS1_0, and, as usual, record a
6467 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6468 Next, we create the vector stmt VS1_1, and record a pointer to
6469 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6470 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6471 stmts and pointers:
6472 RELATED_STMT VEC_STMT
6473 VS1_0: vx0 = memref0 VS1_1 -
6474 VS1_1: vx1 = memref1 VS1_2 -
6475 VS1_2: vx2 = memref2 VS1_3 -
6476 VS1_3: vx3 = memref3 - -
6477 S1: x = load - VS1_0
6478 S2: z = x + 1 - -
6479
6480 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6481 information we recorded in RELATED_STMT field is used to vectorize
6482 stmt S2. */
6483
6484 /* In case of interleaving (non-unit grouped access):
6485
6486 S1: x2 = &base + 2
6487 S2: x0 = &base
6488 S3: x1 = &base + 1
6489 S4: x3 = &base + 3
6490
6491 Vectorized loads are created in the order of memory accesses
6492 starting from the access of the first stmt of the chain:
6493
6494 VS1: vx0 = &base
6495 VS2: vx1 = &base + vec_size*1
6496 VS3: vx3 = &base + vec_size*2
6497 VS4: vx4 = &base + vec_size*3
6498
6499 Then permutation statements are generated:
6500
6501 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6502 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6503 ...
6504
6505 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6506 (the order of the data-refs in the output of vect_permute_load_chain
6507 corresponds to the order of scalar stmts in the interleaving chain - see
6508 the documentation of vect_permute_load_chain()).
6509 The generation of permutation stmts and recording them in
6510 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6511
6512 In case of both multiple types and interleaving, the vector loads and
6513 permutation stmts above are created for every copy. The result vector
6514 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6515 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6516
6517 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6518 on a target that supports unaligned accesses (dr_unaligned_supported)
6519 we generate the following code:
6520 p = initial_addr;
6521 indx = 0;
6522 loop {
6523 p = p + indx * vectype_size;
6524 vec_dest = *(p);
6525 indx = indx + 1;
6526 }
6527
6528 Otherwise, the data reference is potentially unaligned on a target that
6529 does not support unaligned accesses (dr_explicit_realign_optimized) -
6530 then generate the following code, in which the data in each iteration is
6531 obtained by two vector loads, one from the previous iteration, and one
6532 from the current iteration:
6533 p1 = initial_addr;
6534 msq_init = *(floor(p1))
6535 p2 = initial_addr + VS - 1;
6536 realignment_token = call target_builtin;
6537 indx = 0;
6538 loop {
6539 p2 = p2 + indx * vectype_size
6540 lsq = *(floor(p2))
6541 vec_dest = realign_load (msq, lsq, realignment_token)
6542 indx = indx + 1;
6543 msq = lsq;
6544 } */
6545
6546 /* If the misalignment remains the same throughout the execution of the
6547 loop, we can create the init_addr and permutation mask at the loop
6548 preheader. Otherwise, it needs to be created inside the loop.
6549 This can only occur when vectorizing memory accesses in the inner-loop
6550 nested within an outer-loop that is being vectorized. */
6551
6552 if (nested_in_vect_loop
6553 && (TREE_INT_CST_LOW (DR_STEP (dr))
6554 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6555 {
6556 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6557 compute_in_loop = true;
6558 }
6559
6560 if ((alignment_support_scheme == dr_explicit_realign_optimized
6561 || alignment_support_scheme == dr_explicit_realign)
6562 && !compute_in_loop)
6563 {
6564 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6565 alignment_support_scheme, NULL_TREE,
6566 &at_loop);
6567 if (alignment_support_scheme == dr_explicit_realign_optimized)
6568 {
6569 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6570 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6571 size_one_node);
6572 }
6573 }
6574 else
6575 at_loop = loop;
6576
6577 if (negative)
6578 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6579
6580 if (load_lanes_p)
6581 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6582 else
6583 aggr_type = vectype;
6584
6585 prev_stmt_info = NULL;
6586 for (j = 0; j < ncopies; j++)
6587 {
6588 /* 1. Create the vector or array pointer update chain. */
6589 if (j == 0)
6590 {
6591 bool simd_lane_access_p
6592 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6593 if (simd_lane_access_p
6594 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6595 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6596 && integer_zerop (DR_OFFSET (first_dr))
6597 && integer_zerop (DR_INIT (first_dr))
6598 && alias_sets_conflict_p (get_alias_set (aggr_type),
6599 get_alias_set (DR_REF (first_dr)))
6600 && (alignment_support_scheme == dr_aligned
6601 || alignment_support_scheme == dr_unaligned_supported))
6602 {
6603 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6604 dataref_offset = build_int_cst (reference_alias_ptr_type
6605 (DR_REF (first_dr)), 0);
6606 inv_p = false;
6607 }
6608 else
6609 dataref_ptr
6610 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6611 offset, &dummy, gsi, &ptr_incr,
6612 simd_lane_access_p, &inv_p,
6613 byte_offset);
6614 }
6615 else if (dataref_offset)
6616 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6617 TYPE_SIZE_UNIT (aggr_type));
6618 else
6619 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6620 TYPE_SIZE_UNIT (aggr_type));
6621
6622 if (grouped_load || slp_perm)
6623 dr_chain.create (vec_num);
6624
6625 if (load_lanes_p)
6626 {
6627 tree vec_array;
6628
6629 vec_array = create_vector_array (vectype, vec_num);
6630
6631 /* Emit:
6632 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6633 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6634 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6635 gimple_call_set_lhs (new_stmt, vec_array);
6636 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6637
6638 /* Extract each vector into an SSA_NAME. */
6639 for (i = 0; i < vec_num; i++)
6640 {
6641 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6642 vec_array, i);
6643 dr_chain.quick_push (new_temp);
6644 }
6645
6646 /* Record the mapping between SSA_NAMEs and statements. */
6647 vect_record_grouped_load_vectors (stmt, dr_chain);
6648 }
6649 else
6650 {
6651 for (i = 0; i < vec_num; i++)
6652 {
6653 if (i > 0)
6654 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6655 stmt, NULL_TREE);
6656
6657 /* 2. Create the vector-load in the loop. */
6658 switch (alignment_support_scheme)
6659 {
6660 case dr_aligned:
6661 case dr_unaligned_supported:
6662 {
6663 unsigned int align, misalign;
6664
6665 data_ref
6666 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6667 dataref_offset
6668 ? dataref_offset
6669 : build_int_cst (reference_alias_ptr_type
6670 (DR_REF (first_dr)), 0));
6671 align = TYPE_ALIGN_UNIT (vectype);
6672 if (alignment_support_scheme == dr_aligned)
6673 {
6674 gcc_assert (aligned_access_p (first_dr));
6675 misalign = 0;
6676 }
6677 else if (DR_MISALIGNMENT (first_dr) == -1)
6678 {
6679 TREE_TYPE (data_ref)
6680 = build_aligned_type (TREE_TYPE (data_ref),
6681 TYPE_ALIGN (elem_type));
6682 align = TYPE_ALIGN_UNIT (elem_type);
6683 misalign = 0;
6684 }
6685 else
6686 {
6687 TREE_TYPE (data_ref)
6688 = build_aligned_type (TREE_TYPE (data_ref),
6689 TYPE_ALIGN (elem_type));
6690 misalign = DR_MISALIGNMENT (first_dr);
6691 }
6692 if (dataref_offset == NULL_TREE
6693 && TREE_CODE (dataref_ptr) == SSA_NAME)
6694 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6695 align, misalign);
6696 break;
6697 }
6698 case dr_explicit_realign:
6699 {
6700 tree ptr, bump;
6701
6702 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6703
6704 if (compute_in_loop)
6705 msq = vect_setup_realignment (first_stmt, gsi,
6706 &realignment_token,
6707 dr_explicit_realign,
6708 dataref_ptr, NULL);
6709
6710 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6711 ptr = copy_ssa_name (dataref_ptr);
6712 else
6713 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6714 new_stmt = gimple_build_assign
6715 (ptr, BIT_AND_EXPR, dataref_ptr,
6716 build_int_cst
6717 (TREE_TYPE (dataref_ptr),
6718 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6719 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6720 data_ref
6721 = build2 (MEM_REF, vectype, ptr,
6722 build_int_cst (reference_alias_ptr_type
6723 (DR_REF (first_dr)), 0));
6724 vec_dest = vect_create_destination_var (scalar_dest,
6725 vectype);
6726 new_stmt = gimple_build_assign (vec_dest, data_ref);
6727 new_temp = make_ssa_name (vec_dest, new_stmt);
6728 gimple_assign_set_lhs (new_stmt, new_temp);
6729 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6730 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6732 msq = new_temp;
6733
6734 bump = size_binop (MULT_EXPR, vs,
6735 TYPE_SIZE_UNIT (elem_type));
6736 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6737 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6738 new_stmt = gimple_build_assign
6739 (NULL_TREE, BIT_AND_EXPR, ptr,
6740 build_int_cst
6741 (TREE_TYPE (ptr),
6742 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6743 ptr = copy_ssa_name (ptr, new_stmt);
6744 gimple_assign_set_lhs (new_stmt, ptr);
6745 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6746 data_ref
6747 = build2 (MEM_REF, vectype, ptr,
6748 build_int_cst (reference_alias_ptr_type
6749 (DR_REF (first_dr)), 0));
6750 break;
6751 }
6752 case dr_explicit_realign_optimized:
6753 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6754 new_temp = copy_ssa_name (dataref_ptr);
6755 else
6756 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
6757 new_stmt = gimple_build_assign
6758 (new_temp, BIT_AND_EXPR, dataref_ptr,
6759 build_int_cst
6760 (TREE_TYPE (dataref_ptr),
6761 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6763 data_ref
6764 = build2 (MEM_REF, vectype, new_temp,
6765 build_int_cst (reference_alias_ptr_type
6766 (DR_REF (first_dr)), 0));
6767 break;
6768 default:
6769 gcc_unreachable ();
6770 }
6771 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6772 new_stmt = gimple_build_assign (vec_dest, data_ref);
6773 new_temp = make_ssa_name (vec_dest, new_stmt);
6774 gimple_assign_set_lhs (new_stmt, new_temp);
6775 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6776
6777 /* 3. Handle explicit realignment if necessary/supported.
6778 Create in loop:
6779 vec_dest = realign_load (msq, lsq, realignment_token) */
6780 if (alignment_support_scheme == dr_explicit_realign_optimized
6781 || alignment_support_scheme == dr_explicit_realign)
6782 {
6783 lsq = gimple_assign_lhs (new_stmt);
6784 if (!realignment_token)
6785 realignment_token = dataref_ptr;
6786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6787 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6788 msq, lsq, realignment_token);
6789 new_temp = make_ssa_name (vec_dest, new_stmt);
6790 gimple_assign_set_lhs (new_stmt, new_temp);
6791 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6792
6793 if (alignment_support_scheme == dr_explicit_realign_optimized)
6794 {
6795 gcc_assert (phi);
6796 if (i == vec_num - 1 && j == ncopies - 1)
6797 add_phi_arg (phi, lsq,
6798 loop_latch_edge (containing_loop),
6799 UNKNOWN_LOCATION);
6800 msq = lsq;
6801 }
6802 }
6803
6804 /* 4. Handle invariant-load. */
6805 if (inv_p && !bb_vinfo)
6806 {
6807 gcc_assert (!grouped_load);
6808 /* If we have versioned for aliasing or the loop doesn't
6809 have any data dependencies that would preclude this,
6810 then we are sure this is a loop invariant load and
6811 thus we can insert it on the preheader edge. */
6812 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6813 && !nested_in_vect_loop
6814 && hoist_defs_of_uses (stmt, loop))
6815 {
6816 if (dump_enabled_p ())
6817 {
6818 dump_printf_loc (MSG_NOTE, vect_location,
6819 "hoisting out of the vectorized "
6820 "loop: ");
6821 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6822 }
6823 tree tem = copy_ssa_name (scalar_dest);
6824 gsi_insert_on_edge_immediate
6825 (loop_preheader_edge (loop),
6826 gimple_build_assign (tem,
6827 unshare_expr
6828 (gimple_assign_rhs1 (stmt))));
6829 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6830 }
6831 else
6832 {
6833 gimple_stmt_iterator gsi2 = *gsi;
6834 gsi_next (&gsi2);
6835 new_temp = vect_init_vector (stmt, scalar_dest,
6836 vectype, &gsi2);
6837 }
6838 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6839 set_vinfo_for_stmt (new_stmt,
6840 new_stmt_vec_info (new_stmt, loop_vinfo,
6841 bb_vinfo));
6842 }
6843
6844 if (negative)
6845 {
6846 tree perm_mask = perm_mask_for_reverse (vectype);
6847 new_temp = permute_vec_elements (new_temp, new_temp,
6848 perm_mask, stmt, gsi);
6849 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6850 }
6851
6852 /* Collect vector loads and later create their permutation in
6853 vect_transform_grouped_load (). */
6854 if (grouped_load || slp_perm)
6855 dr_chain.quick_push (new_temp);
6856
6857 /* Store vector loads in the corresponding SLP_NODE. */
6858 if (slp && !slp_perm)
6859 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6860 }
6861 /* Bump the vector pointer to account for a gap or for excess
6862 elements loaded for a permuted SLP load. */
6863 if (group_gap_adj != 0)
6864 {
6865 bool ovf;
6866 tree bump
6867 = wide_int_to_tree (sizetype,
6868 wi::smul (TYPE_SIZE_UNIT (elem_type),
6869 group_gap_adj, &ovf));
6870 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6871 stmt, bump);
6872 }
6873 }
6874
6875 if (slp && !slp_perm)
6876 continue;
6877
6878 if (slp_perm)
6879 {
6880 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6881 slp_node_instance, false))
6882 {
6883 dr_chain.release ();
6884 return false;
6885 }
6886 }
6887 else
6888 {
6889 if (grouped_load)
6890 {
6891 if (!load_lanes_p)
6892 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6893 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6894 }
6895 else
6896 {
6897 if (j == 0)
6898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6899 else
6900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6901 prev_stmt_info = vinfo_for_stmt (new_stmt);
6902 }
6903 }
6904 dr_chain.release ();
6905 }
6906
6907 return true;
6908 }
6909
6910 /* Function vect_is_simple_cond.
6911
6912 Input:
6913 LOOP - the loop that is being vectorized.
6914 COND - Condition that is checked for simple use.
6915
6916 Output:
6917 *COMP_VECTYPE - the vector type for the comparison.
6918
6919 Returns whether a COND can be vectorized. Checks whether
6920 condition operands are supportable using vec_is_simple_use. */
6921
6922 static bool
6923 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6924 bb_vec_info bb_vinfo, tree *comp_vectype)
6925 {
6926 tree lhs, rhs;
6927 tree def;
6928 enum vect_def_type dt;
6929 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6930
6931 if (!COMPARISON_CLASS_P (cond))
6932 return false;
6933
6934 lhs = TREE_OPERAND (cond, 0);
6935 rhs = TREE_OPERAND (cond, 1);
6936
6937 if (TREE_CODE (lhs) == SSA_NAME)
6938 {
6939 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6940 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6941 &lhs_def_stmt, &def, &dt, &vectype1))
6942 return false;
6943 }
6944 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6945 && TREE_CODE (lhs) != FIXED_CST)
6946 return false;
6947
6948 if (TREE_CODE (rhs) == SSA_NAME)
6949 {
6950 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6951 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6952 &rhs_def_stmt, &def, &dt, &vectype2))
6953 return false;
6954 }
6955 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6956 && TREE_CODE (rhs) != FIXED_CST)
6957 return false;
6958
6959 *comp_vectype = vectype1 ? vectype1 : vectype2;
6960 return true;
6961 }
6962
6963 /* vectorizable_condition.
6964
6965 Check if STMT is conditional modify expression that can be vectorized.
6966 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6967 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6968 at GSI.
6969
6970 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6971 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6972 else caluse if it is 2).
6973
6974 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6975
6976 bool
6977 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6978 gimple *vec_stmt, tree reduc_def, int reduc_index,
6979 slp_tree slp_node)
6980 {
6981 tree scalar_dest = NULL_TREE;
6982 tree vec_dest = NULL_TREE;
6983 tree cond_expr, then_clause, else_clause;
6984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6985 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6986 tree comp_vectype = NULL_TREE;
6987 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6988 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6989 tree vec_compare, vec_cond_expr;
6990 tree new_temp;
6991 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6992 tree def;
6993 enum vect_def_type dt, dts[4];
6994 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6995 int ncopies;
6996 enum tree_code code;
6997 stmt_vec_info prev_stmt_info = NULL;
6998 int i, j;
6999 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7000 vec<tree> vec_oprnds0 = vNULL;
7001 vec<tree> vec_oprnds1 = vNULL;
7002 vec<tree> vec_oprnds2 = vNULL;
7003 vec<tree> vec_oprnds3 = vNULL;
7004 tree vec_cmp_type;
7005
7006 if (slp_node || PURE_SLP_STMT (stmt_info))
7007 ncopies = 1;
7008 else
7009 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7010
7011 gcc_assert (ncopies >= 1);
7012 if (reduc_index && ncopies > 1)
7013 return false; /* FORNOW */
7014
7015 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7016 return false;
7017
7018 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7019 return false;
7020
7021 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7022 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7023 && reduc_def))
7024 return false;
7025
7026 /* FORNOW: not yet supported. */
7027 if (STMT_VINFO_LIVE_P (stmt_info))
7028 {
7029 if (dump_enabled_p ())
7030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7031 "value used after loop.\n");
7032 return false;
7033 }
7034
7035 /* Is vectorizable conditional operation? */
7036 if (!is_gimple_assign (stmt))
7037 return false;
7038
7039 code = gimple_assign_rhs_code (stmt);
7040
7041 if (code != COND_EXPR)
7042 return false;
7043
7044 cond_expr = gimple_assign_rhs1 (stmt);
7045 then_clause = gimple_assign_rhs2 (stmt);
7046 else_clause = gimple_assign_rhs3 (stmt);
7047
7048 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7049 &comp_vectype)
7050 || !comp_vectype)
7051 return false;
7052
7053 if (TREE_CODE (then_clause) == SSA_NAME)
7054 {
7055 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7056 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7057 &then_def_stmt, &def, &dt))
7058 return false;
7059 }
7060 else if (TREE_CODE (then_clause) != INTEGER_CST
7061 && TREE_CODE (then_clause) != REAL_CST
7062 && TREE_CODE (then_clause) != FIXED_CST)
7063 return false;
7064
7065 if (TREE_CODE (else_clause) == SSA_NAME)
7066 {
7067 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7068 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7069 &else_def_stmt, &def, &dt))
7070 return false;
7071 }
7072 else if (TREE_CODE (else_clause) != INTEGER_CST
7073 && TREE_CODE (else_clause) != REAL_CST
7074 && TREE_CODE (else_clause) != FIXED_CST)
7075 return false;
7076
7077 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7078 /* The result of a vector comparison should be signed type. */
7079 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7080 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7081 if (vec_cmp_type == NULL_TREE)
7082 return false;
7083
7084 if (!vec_stmt)
7085 {
7086 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7087 return expand_vec_cond_expr_p (vectype, comp_vectype);
7088 }
7089
7090 /* Transform. */
7091
7092 if (!slp_node)
7093 {
7094 vec_oprnds0.create (1);
7095 vec_oprnds1.create (1);
7096 vec_oprnds2.create (1);
7097 vec_oprnds3.create (1);
7098 }
7099
7100 /* Handle def. */
7101 scalar_dest = gimple_assign_lhs (stmt);
7102 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7103
7104 /* Handle cond expr. */
7105 for (j = 0; j < ncopies; j++)
7106 {
7107 gassign *new_stmt = NULL;
7108 if (j == 0)
7109 {
7110 if (slp_node)
7111 {
7112 auto_vec<tree, 4> ops;
7113 auto_vec<vec<tree>, 4> vec_defs;
7114
7115 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7116 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7117 ops.safe_push (then_clause);
7118 ops.safe_push (else_clause);
7119 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7120 vec_oprnds3 = vec_defs.pop ();
7121 vec_oprnds2 = vec_defs.pop ();
7122 vec_oprnds1 = vec_defs.pop ();
7123 vec_oprnds0 = vec_defs.pop ();
7124
7125 ops.release ();
7126 vec_defs.release ();
7127 }
7128 else
7129 {
7130 gimple gtemp;
7131 vec_cond_lhs =
7132 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7133 stmt, NULL);
7134 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7135 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7136
7137 vec_cond_rhs =
7138 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7139 stmt, NULL);
7140 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7141 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7142 if (reduc_index == 1)
7143 vec_then_clause = reduc_def;
7144 else
7145 {
7146 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7147 stmt, NULL);
7148 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7149 NULL, &gtemp, &def, &dts[2]);
7150 }
7151 if (reduc_index == 2)
7152 vec_else_clause = reduc_def;
7153 else
7154 {
7155 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7156 stmt, NULL);
7157 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7158 NULL, &gtemp, &def, &dts[3]);
7159 }
7160 }
7161 }
7162 else
7163 {
7164 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7165 vec_oprnds0.pop ());
7166 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7167 vec_oprnds1.pop ());
7168 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7169 vec_oprnds2.pop ());
7170 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7171 vec_oprnds3.pop ());
7172 }
7173
7174 if (!slp_node)
7175 {
7176 vec_oprnds0.quick_push (vec_cond_lhs);
7177 vec_oprnds1.quick_push (vec_cond_rhs);
7178 vec_oprnds2.quick_push (vec_then_clause);
7179 vec_oprnds3.quick_push (vec_else_clause);
7180 }
7181
7182 /* Arguments are ready. Create the new vector stmt. */
7183 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7184 {
7185 vec_cond_rhs = vec_oprnds1[i];
7186 vec_then_clause = vec_oprnds2[i];
7187 vec_else_clause = vec_oprnds3[i];
7188
7189 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7190 vec_cond_lhs, vec_cond_rhs);
7191 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7192 vec_compare, vec_then_clause, vec_else_clause);
7193
7194 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7195 new_temp = make_ssa_name (vec_dest, new_stmt);
7196 gimple_assign_set_lhs (new_stmt, new_temp);
7197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7198 if (slp_node)
7199 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7200 }
7201
7202 if (slp_node)
7203 continue;
7204
7205 if (j == 0)
7206 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7207 else
7208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7209
7210 prev_stmt_info = vinfo_for_stmt (new_stmt);
7211 }
7212
7213 vec_oprnds0.release ();
7214 vec_oprnds1.release ();
7215 vec_oprnds2.release ();
7216 vec_oprnds3.release ();
7217
7218 return true;
7219 }
7220
7221
7222 /* Make sure the statement is vectorizable. */
7223
7224 bool
7225 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7226 {
7227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7229 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7230 bool ok;
7231 tree scalar_type, vectype;
7232 gimple pattern_stmt;
7233 gimple_seq pattern_def_seq;
7234
7235 if (dump_enabled_p ())
7236 {
7237 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7238 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7239 }
7240
7241 if (gimple_has_volatile_ops (stmt))
7242 {
7243 if (dump_enabled_p ())
7244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7245 "not vectorized: stmt has volatile operands\n");
7246
7247 return false;
7248 }
7249
7250 /* Skip stmts that do not need to be vectorized. In loops this is expected
7251 to include:
7252 - the COND_EXPR which is the loop exit condition
7253 - any LABEL_EXPRs in the loop
7254 - computations that are used only for array indexing or loop control.
7255 In basic blocks we only analyze statements that are a part of some SLP
7256 instance, therefore, all the statements are relevant.
7257
7258 Pattern statement needs to be analyzed instead of the original statement
7259 if the original statement is not relevant. Otherwise, we analyze both
7260 statements. In basic blocks we are called from some SLP instance
7261 traversal, don't analyze pattern stmts instead, the pattern stmts
7262 already will be part of SLP instance. */
7263
7264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7265 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7266 && !STMT_VINFO_LIVE_P (stmt_info))
7267 {
7268 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7269 && pattern_stmt
7270 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7271 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7272 {
7273 /* Analyze PATTERN_STMT instead of the original stmt. */
7274 stmt = pattern_stmt;
7275 stmt_info = vinfo_for_stmt (pattern_stmt);
7276 if (dump_enabled_p ())
7277 {
7278 dump_printf_loc (MSG_NOTE, vect_location,
7279 "==> examining pattern statement: ");
7280 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7281 }
7282 }
7283 else
7284 {
7285 if (dump_enabled_p ())
7286 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7287
7288 return true;
7289 }
7290 }
7291 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7292 && node == NULL
7293 && pattern_stmt
7294 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7296 {
7297 /* Analyze PATTERN_STMT too. */
7298 if (dump_enabled_p ())
7299 {
7300 dump_printf_loc (MSG_NOTE, vect_location,
7301 "==> examining pattern statement: ");
7302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7303 }
7304
7305 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7306 return false;
7307 }
7308
7309 if (is_pattern_stmt_p (stmt_info)
7310 && node == NULL
7311 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7312 {
7313 gimple_stmt_iterator si;
7314
7315 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7316 {
7317 gimple pattern_def_stmt = gsi_stmt (si);
7318 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7320 {
7321 /* Analyze def stmt of STMT if it's a pattern stmt. */
7322 if (dump_enabled_p ())
7323 {
7324 dump_printf_loc (MSG_NOTE, vect_location,
7325 "==> examining pattern def statement: ");
7326 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7327 }
7328
7329 if (!vect_analyze_stmt (pattern_def_stmt,
7330 need_to_vectorize, node))
7331 return false;
7332 }
7333 }
7334 }
7335
7336 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7337 {
7338 case vect_internal_def:
7339 break;
7340
7341 case vect_reduction_def:
7342 case vect_nested_cycle:
7343 gcc_assert (!bb_vinfo
7344 && (relevance == vect_used_in_outer
7345 || relevance == vect_used_in_outer_by_reduction
7346 || relevance == vect_used_by_reduction
7347 || relevance == vect_unused_in_scope));
7348 break;
7349
7350 case vect_induction_def:
7351 case vect_constant_def:
7352 case vect_external_def:
7353 case vect_unknown_def_type:
7354 default:
7355 gcc_unreachable ();
7356 }
7357
7358 if (bb_vinfo)
7359 {
7360 gcc_assert (PURE_SLP_STMT (stmt_info));
7361
7362 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7363 if (dump_enabled_p ())
7364 {
7365 dump_printf_loc (MSG_NOTE, vect_location,
7366 "get vectype for scalar type: ");
7367 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7368 dump_printf (MSG_NOTE, "\n");
7369 }
7370
7371 vectype = get_vectype_for_scalar_type (scalar_type);
7372 if (!vectype)
7373 {
7374 if (dump_enabled_p ())
7375 {
7376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7377 "not SLPed: unsupported data-type ");
7378 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7379 scalar_type);
7380 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7381 }
7382 return false;
7383 }
7384
7385 if (dump_enabled_p ())
7386 {
7387 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7388 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7389 dump_printf (MSG_NOTE, "\n");
7390 }
7391
7392 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7393 }
7394
7395 if (STMT_VINFO_RELEVANT_P (stmt_info))
7396 {
7397 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7398 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7399 || (is_gimple_call (stmt)
7400 && gimple_call_lhs (stmt) == NULL_TREE));
7401 *need_to_vectorize = true;
7402 }
7403
7404 if (PURE_SLP_STMT (stmt_info) && !node)
7405 {
7406 dump_printf_loc (MSG_NOTE, vect_location,
7407 "handled only by SLP analysis\n");
7408 return true;
7409 }
7410
7411 ok = true;
7412 if (!bb_vinfo
7413 && (STMT_VINFO_RELEVANT_P (stmt_info)
7414 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7415 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7416 || vectorizable_conversion (stmt, NULL, NULL, node)
7417 || vectorizable_shift (stmt, NULL, NULL, node)
7418 || vectorizable_operation (stmt, NULL, NULL, node)
7419 || vectorizable_assignment (stmt, NULL, NULL, node)
7420 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7421 || vectorizable_call (stmt, NULL, NULL, node)
7422 || vectorizable_store (stmt, NULL, NULL, node)
7423 || vectorizable_reduction (stmt, NULL, NULL, node)
7424 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7425 else
7426 {
7427 if (bb_vinfo)
7428 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7429 || vectorizable_conversion (stmt, NULL, NULL, node)
7430 || vectorizable_shift (stmt, NULL, NULL, node)
7431 || vectorizable_operation (stmt, NULL, NULL, node)
7432 || vectorizable_assignment (stmt, NULL, NULL, node)
7433 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7434 || vectorizable_call (stmt, NULL, NULL, node)
7435 || vectorizable_store (stmt, NULL, NULL, node)
7436 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7437 }
7438
7439 if (!ok)
7440 {
7441 if (dump_enabled_p ())
7442 {
7443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7444 "not vectorized: relevant stmt not ");
7445 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7446 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7447 }
7448
7449 return false;
7450 }
7451
7452 if (bb_vinfo)
7453 return true;
7454
7455 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7456 need extra handling, except for vectorizable reductions. */
7457 if (STMT_VINFO_LIVE_P (stmt_info)
7458 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7459 ok = vectorizable_live_operation (stmt, NULL, NULL);
7460
7461 if (!ok)
7462 {
7463 if (dump_enabled_p ())
7464 {
7465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7466 "not vectorized: live stmt not ");
7467 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7468 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7469 }
7470
7471 return false;
7472 }
7473
7474 return true;
7475 }
7476
7477
7478 /* Function vect_transform_stmt.
7479
7480 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7481
7482 bool
7483 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7484 bool *grouped_store, slp_tree slp_node,
7485 slp_instance slp_node_instance)
7486 {
7487 bool is_store = false;
7488 gimple vec_stmt = NULL;
7489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7490 bool done;
7491
7492 switch (STMT_VINFO_TYPE (stmt_info))
7493 {
7494 case type_demotion_vec_info_type:
7495 case type_promotion_vec_info_type:
7496 case type_conversion_vec_info_type:
7497 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7498 gcc_assert (done);
7499 break;
7500
7501 case induc_vec_info_type:
7502 gcc_assert (!slp_node);
7503 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7504 gcc_assert (done);
7505 break;
7506
7507 case shift_vec_info_type:
7508 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7509 gcc_assert (done);
7510 break;
7511
7512 case op_vec_info_type:
7513 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7514 gcc_assert (done);
7515 break;
7516
7517 case assignment_vec_info_type:
7518 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7519 gcc_assert (done);
7520 break;
7521
7522 case load_vec_info_type:
7523 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7524 slp_node_instance);
7525 gcc_assert (done);
7526 break;
7527
7528 case store_vec_info_type:
7529 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7530 gcc_assert (done);
7531 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7532 {
7533 /* In case of interleaving, the whole chain is vectorized when the
7534 last store in the chain is reached. Store stmts before the last
7535 one are skipped, and there vec_stmt_info shouldn't be freed
7536 meanwhile. */
7537 *grouped_store = true;
7538 if (STMT_VINFO_VEC_STMT (stmt_info))
7539 is_store = true;
7540 }
7541 else
7542 is_store = true;
7543 break;
7544
7545 case condition_vec_info_type:
7546 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7547 gcc_assert (done);
7548 break;
7549
7550 case call_vec_info_type:
7551 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7552 stmt = gsi_stmt (*gsi);
7553 if (is_gimple_call (stmt)
7554 && gimple_call_internal_p (stmt)
7555 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7556 is_store = true;
7557 break;
7558
7559 case call_simd_clone_vec_info_type:
7560 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7561 stmt = gsi_stmt (*gsi);
7562 break;
7563
7564 case reduc_vec_info_type:
7565 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7566 gcc_assert (done);
7567 break;
7568
7569 default:
7570 if (!STMT_VINFO_LIVE_P (stmt_info))
7571 {
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "stmt not supported.\n");
7575 gcc_unreachable ();
7576 }
7577 }
7578
7579 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7580 is being vectorized, but outside the immediately enclosing loop. */
7581 if (vec_stmt
7582 && STMT_VINFO_LOOP_VINFO (stmt_info)
7583 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7584 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7585 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7586 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7587 || STMT_VINFO_RELEVANT (stmt_info) ==
7588 vect_used_in_outer_by_reduction))
7589 {
7590 struct loop *innerloop = LOOP_VINFO_LOOP (
7591 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7592 imm_use_iterator imm_iter;
7593 use_operand_p use_p;
7594 tree scalar_dest;
7595 gimple exit_phi;
7596
7597 if (dump_enabled_p ())
7598 dump_printf_loc (MSG_NOTE, vect_location,
7599 "Record the vdef for outer-loop vectorization.\n");
7600
7601 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7602 (to be used when vectorizing outer-loop stmts that use the DEF of
7603 STMT). */
7604 if (gimple_code (stmt) == GIMPLE_PHI)
7605 scalar_dest = PHI_RESULT (stmt);
7606 else
7607 scalar_dest = gimple_assign_lhs (stmt);
7608
7609 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7610 {
7611 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7612 {
7613 exit_phi = USE_STMT (use_p);
7614 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7615 }
7616 }
7617 }
7618
7619 /* Handle stmts whose DEF is used outside the loop-nest that is
7620 being vectorized. */
7621 if (STMT_VINFO_LIVE_P (stmt_info)
7622 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7623 {
7624 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7625 gcc_assert (done);
7626 }
7627
7628 if (vec_stmt)
7629 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7630
7631 return is_store;
7632 }
7633
7634
7635 /* Remove a group of stores (for SLP or interleaving), free their
7636 stmt_vec_info. */
7637
7638 void
7639 vect_remove_stores (gimple first_stmt)
7640 {
7641 gimple next = first_stmt;
7642 gimple tmp;
7643 gimple_stmt_iterator next_si;
7644
7645 while (next)
7646 {
7647 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7648
7649 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7650 if (is_pattern_stmt_p (stmt_info))
7651 next = STMT_VINFO_RELATED_STMT (stmt_info);
7652 /* Free the attached stmt_vec_info and remove the stmt. */
7653 next_si = gsi_for_stmt (next);
7654 unlink_stmt_vdef (next);
7655 gsi_remove (&next_si, true);
7656 release_defs (next);
7657 free_stmt_vec_info (next);
7658 next = tmp;
7659 }
7660 }
7661
7662
7663 /* Function new_stmt_vec_info.
7664
7665 Create and initialize a new stmt_vec_info struct for STMT. */
7666
7667 stmt_vec_info
7668 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7669 bb_vec_info bb_vinfo)
7670 {
7671 stmt_vec_info res;
7672 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7673
7674 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7675 STMT_VINFO_STMT (res) = stmt;
7676 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7677 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7678 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7679 STMT_VINFO_LIVE_P (res) = false;
7680 STMT_VINFO_VECTYPE (res) = NULL;
7681 STMT_VINFO_VEC_STMT (res) = NULL;
7682 STMT_VINFO_VECTORIZABLE (res) = true;
7683 STMT_VINFO_IN_PATTERN_P (res) = false;
7684 STMT_VINFO_RELATED_STMT (res) = NULL;
7685 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7686 STMT_VINFO_DATA_REF (res) = NULL;
7687
7688 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7689 STMT_VINFO_DR_OFFSET (res) = NULL;
7690 STMT_VINFO_DR_INIT (res) = NULL;
7691 STMT_VINFO_DR_STEP (res) = NULL;
7692 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7693
7694 if (gimple_code (stmt) == GIMPLE_PHI
7695 && is_loop_header_bb_p (gimple_bb (stmt)))
7696 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7697 else
7698 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7699
7700 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7701 STMT_SLP_TYPE (res) = loop_vect;
7702 GROUP_FIRST_ELEMENT (res) = NULL;
7703 GROUP_NEXT_ELEMENT (res) = NULL;
7704 GROUP_SIZE (res) = 0;
7705 GROUP_STORE_COUNT (res) = 0;
7706 GROUP_GAP (res) = 0;
7707 GROUP_SAME_DR_STMT (res) = NULL;
7708
7709 return res;
7710 }
7711
7712
7713 /* Create a hash table for stmt_vec_info. */
7714
7715 void
7716 init_stmt_vec_info_vec (void)
7717 {
7718 gcc_assert (!stmt_vec_info_vec.exists ());
7719 stmt_vec_info_vec.create (50);
7720 }
7721
7722
7723 /* Free hash table for stmt_vec_info. */
7724
7725 void
7726 free_stmt_vec_info_vec (void)
7727 {
7728 unsigned int i;
7729 vec_void_p info;
7730 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7731 if (info != NULL)
7732 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7733 gcc_assert (stmt_vec_info_vec.exists ());
7734 stmt_vec_info_vec.release ();
7735 }
7736
7737
7738 /* Free stmt vectorization related info. */
7739
7740 void
7741 free_stmt_vec_info (gimple stmt)
7742 {
7743 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7744
7745 if (!stmt_info)
7746 return;
7747
7748 /* Check if this statement has a related "pattern stmt"
7749 (introduced by the vectorizer during the pattern recognition
7750 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7751 too. */
7752 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7753 {
7754 stmt_vec_info patt_info
7755 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7756 if (patt_info)
7757 {
7758 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7759 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7760 gimple_set_bb (patt_stmt, NULL);
7761 tree lhs = gimple_get_lhs (patt_stmt);
7762 if (TREE_CODE (lhs) == SSA_NAME)
7763 release_ssa_name (lhs);
7764 if (seq)
7765 {
7766 gimple_stmt_iterator si;
7767 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7768 {
7769 gimple seq_stmt = gsi_stmt (si);
7770 gimple_set_bb (seq_stmt, NULL);
7771 lhs = gimple_get_lhs (patt_stmt);
7772 if (TREE_CODE (lhs) == SSA_NAME)
7773 release_ssa_name (lhs);
7774 free_stmt_vec_info (seq_stmt);
7775 }
7776 }
7777 free_stmt_vec_info (patt_stmt);
7778 }
7779 }
7780
7781 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7782 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7783 set_vinfo_for_stmt (stmt, NULL);
7784 free (stmt_info);
7785 }
7786
7787
7788 /* Function get_vectype_for_scalar_type_and_size.
7789
7790 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7791 by the target. */
7792
7793 static tree
7794 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7795 {
7796 machine_mode inner_mode = TYPE_MODE (scalar_type);
7797 machine_mode simd_mode;
7798 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7799 int nunits;
7800 tree vectype;
7801
7802 if (nbytes == 0)
7803 return NULL_TREE;
7804
7805 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7806 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7807 return NULL_TREE;
7808
7809 /* For vector types of elements whose mode precision doesn't
7810 match their types precision we use a element type of mode
7811 precision. The vectorization routines will have to make sure
7812 they support the proper result truncation/extension.
7813 We also make sure to build vector types with INTEGER_TYPE
7814 component type only. */
7815 if (INTEGRAL_TYPE_P (scalar_type)
7816 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7817 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7818 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7819 TYPE_UNSIGNED (scalar_type));
7820
7821 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7822 When the component mode passes the above test simply use a type
7823 corresponding to that mode. The theory is that any use that
7824 would cause problems with this will disable vectorization anyway. */
7825 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7826 && !INTEGRAL_TYPE_P (scalar_type))
7827 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7828
7829 /* We can't build a vector type of elements with alignment bigger than
7830 their size. */
7831 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7832 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7833 TYPE_UNSIGNED (scalar_type));
7834
7835 /* If we felt back to using the mode fail if there was
7836 no scalar type for it. */
7837 if (scalar_type == NULL_TREE)
7838 return NULL_TREE;
7839
7840 /* If no size was supplied use the mode the target prefers. Otherwise
7841 lookup a vector mode of the specified size. */
7842 if (size == 0)
7843 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7844 else
7845 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7846 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7847 if (nunits <= 1)
7848 return NULL_TREE;
7849
7850 vectype = build_vector_type (scalar_type, nunits);
7851
7852 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7853 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7854 return NULL_TREE;
7855
7856 return vectype;
7857 }
7858
7859 unsigned int current_vector_size;
7860
7861 /* Function get_vectype_for_scalar_type.
7862
7863 Returns the vector type corresponding to SCALAR_TYPE as supported
7864 by the target. */
7865
7866 tree
7867 get_vectype_for_scalar_type (tree scalar_type)
7868 {
7869 tree vectype;
7870 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7871 current_vector_size);
7872 if (vectype
7873 && current_vector_size == 0)
7874 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7875 return vectype;
7876 }
7877
7878 /* Function get_same_sized_vectype
7879
7880 Returns a vector type corresponding to SCALAR_TYPE of size
7881 VECTOR_TYPE if supported by the target. */
7882
7883 tree
7884 get_same_sized_vectype (tree scalar_type, tree vector_type)
7885 {
7886 return get_vectype_for_scalar_type_and_size
7887 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7888 }
7889
7890 /* Function vect_is_simple_use.
7891
7892 Input:
7893 LOOP_VINFO - the vect info of the loop that is being vectorized.
7894 BB_VINFO - the vect info of the basic block that is being vectorized.
7895 OPERAND - operand of STMT in the loop or bb.
7896 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7897
7898 Returns whether a stmt with OPERAND can be vectorized.
7899 For loops, supportable operands are constants, loop invariants, and operands
7900 that are defined by the current iteration of the loop. Unsupportable
7901 operands are those that are defined by a previous iteration of the loop (as
7902 is the case in reduction/induction computations).
7903 For basic blocks, supportable operands are constants and bb invariants.
7904 For now, operands defined outside the basic block are not supported. */
7905
7906 bool
7907 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7908 bb_vec_info bb_vinfo, gimple *def_stmt,
7909 tree *def, enum vect_def_type *dt)
7910 {
7911 *def_stmt = NULL;
7912 *def = NULL_TREE;
7913 *dt = vect_unknown_def_type;
7914
7915 if (dump_enabled_p ())
7916 {
7917 dump_printf_loc (MSG_NOTE, vect_location,
7918 "vect_is_simple_use: operand ");
7919 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7920 dump_printf (MSG_NOTE, "\n");
7921 }
7922
7923 if (CONSTANT_CLASS_P (operand))
7924 {
7925 *dt = vect_constant_def;
7926 return true;
7927 }
7928
7929 if (is_gimple_min_invariant (operand))
7930 {
7931 *def = operand;
7932 *dt = vect_external_def;
7933 return true;
7934 }
7935
7936 if (TREE_CODE (operand) != SSA_NAME)
7937 {
7938 if (dump_enabled_p ())
7939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7940 "not ssa-name.\n");
7941 return false;
7942 }
7943
7944 if (SSA_NAME_IS_DEFAULT_DEF (operand))
7945 {
7946 *def = operand;
7947 *dt = vect_external_def;
7948 return true;
7949 }
7950
7951 *def_stmt = SSA_NAME_DEF_STMT (operand);
7952 if (dump_enabled_p ())
7953 {
7954 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7955 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7956 }
7957
7958 basic_block bb = gimple_bb (*def_stmt);
7959 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
7960 || (bb_vinfo
7961 && (bb != BB_VINFO_BB (bb_vinfo)
7962 || gimple_code (*def_stmt) == GIMPLE_PHI)))
7963 *dt = vect_external_def;
7964 else
7965 {
7966 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
7967 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7968 *dt = vect_external_def;
7969 else
7970 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7971 }
7972
7973 if (dump_enabled_p ())
7974 {
7975 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7976 switch (*dt)
7977 {
7978 case vect_uninitialized_def:
7979 dump_printf (MSG_NOTE, "uninitialized\n");
7980 break;
7981 case vect_constant_def:
7982 dump_printf (MSG_NOTE, "constant\n");
7983 break;
7984 case vect_external_def:
7985 dump_printf (MSG_NOTE, "external\n");
7986 break;
7987 case vect_internal_def:
7988 dump_printf (MSG_NOTE, "internal\n");
7989 break;
7990 case vect_induction_def:
7991 dump_printf (MSG_NOTE, "induction\n");
7992 break;
7993 case vect_reduction_def:
7994 dump_printf (MSG_NOTE, "reduction\n");
7995 break;
7996 case vect_double_reduction_def:
7997 dump_printf (MSG_NOTE, "double reduction\n");
7998 break;
7999 case vect_nested_cycle:
8000 dump_printf (MSG_NOTE, "nested cycle\n");
8001 break;
8002 case vect_unknown_def_type:
8003 dump_printf (MSG_NOTE, "unknown\n");
8004 break;
8005 }
8006 }
8007
8008 if (*dt == vect_unknown_def_type
8009 || (stmt
8010 && *dt == vect_double_reduction_def
8011 && gimple_code (stmt) != GIMPLE_PHI))
8012 {
8013 if (dump_enabled_p ())
8014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8015 "Unsupported pattern.\n");
8016 return false;
8017 }
8018
8019 switch (gimple_code (*def_stmt))
8020 {
8021 case GIMPLE_PHI:
8022 *def = gimple_phi_result (*def_stmt);
8023 break;
8024
8025 case GIMPLE_ASSIGN:
8026 *def = gimple_assign_lhs (*def_stmt);
8027 break;
8028
8029 case GIMPLE_CALL:
8030 *def = gimple_call_lhs (*def_stmt);
8031 if (*def != NULL)
8032 break;
8033 /* FALLTHRU */
8034 default:
8035 if (dump_enabled_p ())
8036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8037 "unsupported defining stmt:\n");
8038 return false;
8039 }
8040
8041 return true;
8042 }
8043
8044 /* Function vect_is_simple_use_1.
8045
8046 Same as vect_is_simple_use_1 but also determines the vector operand
8047 type of OPERAND and stores it to *VECTYPE. If the definition of
8048 OPERAND is vect_uninitialized_def, vect_constant_def or
8049 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8050 is responsible to compute the best suited vector type for the
8051 scalar operand. */
8052
8053 bool
8054 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8055 bb_vec_info bb_vinfo, gimple *def_stmt,
8056 tree *def, enum vect_def_type *dt, tree *vectype)
8057 {
8058 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8059 def, dt))
8060 return false;
8061
8062 /* Now get a vector type if the def is internal, otherwise supply
8063 NULL_TREE and leave it up to the caller to figure out a proper
8064 type for the use stmt. */
8065 if (*dt == vect_internal_def
8066 || *dt == vect_induction_def
8067 || *dt == vect_reduction_def
8068 || *dt == vect_double_reduction_def
8069 || *dt == vect_nested_cycle)
8070 {
8071 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8072
8073 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8074 && !STMT_VINFO_RELEVANT (stmt_info)
8075 && !STMT_VINFO_LIVE_P (stmt_info))
8076 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8077
8078 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8079 gcc_assert (*vectype != NULL_TREE);
8080 }
8081 else if (*dt == vect_uninitialized_def
8082 || *dt == vect_constant_def
8083 || *dt == vect_external_def)
8084 *vectype = NULL_TREE;
8085 else
8086 gcc_unreachable ();
8087
8088 return true;
8089 }
8090
8091
8092 /* Function supportable_widening_operation
8093
8094 Check whether an operation represented by the code CODE is a
8095 widening operation that is supported by the target platform in
8096 vector form (i.e., when operating on arguments of type VECTYPE_IN
8097 producing a result of type VECTYPE_OUT).
8098
8099 Widening operations we currently support are NOP (CONVERT), FLOAT
8100 and WIDEN_MULT. This function checks if these operations are supported
8101 by the target platform either directly (via vector tree-codes), or via
8102 target builtins.
8103
8104 Output:
8105 - CODE1 and CODE2 are codes of vector operations to be used when
8106 vectorizing the operation, if available.
8107 - MULTI_STEP_CVT determines the number of required intermediate steps in
8108 case of multi-step conversion (like char->short->int - in that case
8109 MULTI_STEP_CVT will be 1).
8110 - INTERM_TYPES contains the intermediate type required to perform the
8111 widening operation (short in the above example). */
8112
8113 bool
8114 supportable_widening_operation (enum tree_code code, gimple stmt,
8115 tree vectype_out, tree vectype_in,
8116 enum tree_code *code1, enum tree_code *code2,
8117 int *multi_step_cvt,
8118 vec<tree> *interm_types)
8119 {
8120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8121 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8122 struct loop *vect_loop = NULL;
8123 machine_mode vec_mode;
8124 enum insn_code icode1, icode2;
8125 optab optab1, optab2;
8126 tree vectype = vectype_in;
8127 tree wide_vectype = vectype_out;
8128 enum tree_code c1, c2;
8129 int i;
8130 tree prev_type, intermediate_type;
8131 machine_mode intermediate_mode, prev_mode;
8132 optab optab3, optab4;
8133
8134 *multi_step_cvt = 0;
8135 if (loop_info)
8136 vect_loop = LOOP_VINFO_LOOP (loop_info);
8137
8138 switch (code)
8139 {
8140 case WIDEN_MULT_EXPR:
8141 /* The result of a vectorized widening operation usually requires
8142 two vectors (because the widened results do not fit into one vector).
8143 The generated vector results would normally be expected to be
8144 generated in the same order as in the original scalar computation,
8145 i.e. if 8 results are generated in each vector iteration, they are
8146 to be organized as follows:
8147 vect1: [res1,res2,res3,res4],
8148 vect2: [res5,res6,res7,res8].
8149
8150 However, in the special case that the result of the widening
8151 operation is used in a reduction computation only, the order doesn't
8152 matter (because when vectorizing a reduction we change the order of
8153 the computation). Some targets can take advantage of this and
8154 generate more efficient code. For example, targets like Altivec,
8155 that support widen_mult using a sequence of {mult_even,mult_odd}
8156 generate the following vectors:
8157 vect1: [res1,res3,res5,res7],
8158 vect2: [res2,res4,res6,res8].
8159
8160 When vectorizing outer-loops, we execute the inner-loop sequentially
8161 (each vectorized inner-loop iteration contributes to VF outer-loop
8162 iterations in parallel). We therefore don't allow to change the
8163 order of the computation in the inner-loop during outer-loop
8164 vectorization. */
8165 /* TODO: Another case in which order doesn't *really* matter is when we
8166 widen and then contract again, e.g. (short)((int)x * y >> 8).
8167 Normally, pack_trunc performs an even/odd permute, whereas the
8168 repack from an even/odd expansion would be an interleave, which
8169 would be significantly simpler for e.g. AVX2. */
8170 /* In any case, in order to avoid duplicating the code below, recurse
8171 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8172 are properly set up for the caller. If we fail, we'll continue with
8173 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8174 if (vect_loop
8175 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8176 && !nested_in_vect_loop_p (vect_loop, stmt)
8177 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8178 stmt, vectype_out, vectype_in,
8179 code1, code2, multi_step_cvt,
8180 interm_types))
8181 {
8182 /* Elements in a vector with vect_used_by_reduction property cannot
8183 be reordered if the use chain with this property does not have the
8184 same operation. One such an example is s += a * b, where elements
8185 in a and b cannot be reordered. Here we check if the vector defined
8186 by STMT is only directly used in the reduction statement. */
8187 tree lhs = gimple_assign_lhs (stmt);
8188 use_operand_p dummy;
8189 gimple use_stmt;
8190 stmt_vec_info use_stmt_info = NULL;
8191 if (single_imm_use (lhs, &dummy, &use_stmt)
8192 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8193 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8194 return true;
8195 }
8196 c1 = VEC_WIDEN_MULT_LO_EXPR;
8197 c2 = VEC_WIDEN_MULT_HI_EXPR;
8198 break;
8199
8200 case VEC_WIDEN_MULT_EVEN_EXPR:
8201 /* Support the recursion induced just above. */
8202 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8203 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8204 break;
8205
8206 case WIDEN_LSHIFT_EXPR:
8207 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8208 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8209 break;
8210
8211 CASE_CONVERT:
8212 c1 = VEC_UNPACK_LO_EXPR;
8213 c2 = VEC_UNPACK_HI_EXPR;
8214 break;
8215
8216 case FLOAT_EXPR:
8217 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8218 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8219 break;
8220
8221 case FIX_TRUNC_EXPR:
8222 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8223 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8224 computing the operation. */
8225 return false;
8226
8227 default:
8228 gcc_unreachable ();
8229 }
8230
8231 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8232 {
8233 enum tree_code ctmp = c1;
8234 c1 = c2;
8235 c2 = ctmp;
8236 }
8237
8238 if (code == FIX_TRUNC_EXPR)
8239 {
8240 /* The signedness is determined from output operand. */
8241 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8242 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8243 }
8244 else
8245 {
8246 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8247 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8248 }
8249
8250 if (!optab1 || !optab2)
8251 return false;
8252
8253 vec_mode = TYPE_MODE (vectype);
8254 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8255 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8256 return false;
8257
8258 *code1 = c1;
8259 *code2 = c2;
8260
8261 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8262 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8263 return true;
8264
8265 /* Check if it's a multi-step conversion that can be done using intermediate
8266 types. */
8267
8268 prev_type = vectype;
8269 prev_mode = vec_mode;
8270
8271 if (!CONVERT_EXPR_CODE_P (code))
8272 return false;
8273
8274 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8275 intermediate steps in promotion sequence. We try
8276 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8277 not. */
8278 interm_types->create (MAX_INTERM_CVT_STEPS);
8279 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8280 {
8281 intermediate_mode = insn_data[icode1].operand[0].mode;
8282 intermediate_type
8283 = lang_hooks.types.type_for_mode (intermediate_mode,
8284 TYPE_UNSIGNED (prev_type));
8285 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8286 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8287
8288 if (!optab3 || !optab4
8289 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8290 || insn_data[icode1].operand[0].mode != intermediate_mode
8291 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8292 || insn_data[icode2].operand[0].mode != intermediate_mode
8293 || ((icode1 = optab_handler (optab3, intermediate_mode))
8294 == CODE_FOR_nothing)
8295 || ((icode2 = optab_handler (optab4, intermediate_mode))
8296 == CODE_FOR_nothing))
8297 break;
8298
8299 interm_types->quick_push (intermediate_type);
8300 (*multi_step_cvt)++;
8301
8302 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8303 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8304 return true;
8305
8306 prev_type = intermediate_type;
8307 prev_mode = intermediate_mode;
8308 }
8309
8310 interm_types->release ();
8311 return false;
8312 }
8313
8314
8315 /* Function supportable_narrowing_operation
8316
8317 Check whether an operation represented by the code CODE is a
8318 narrowing operation that is supported by the target platform in
8319 vector form (i.e., when operating on arguments of type VECTYPE_IN
8320 and producing a result of type VECTYPE_OUT).
8321
8322 Narrowing operations we currently support are NOP (CONVERT) and
8323 FIX_TRUNC. This function checks if these operations are supported by
8324 the target platform directly via vector tree-codes.
8325
8326 Output:
8327 - CODE1 is the code of a vector operation to be used when
8328 vectorizing the operation, if available.
8329 - MULTI_STEP_CVT determines the number of required intermediate steps in
8330 case of multi-step conversion (like int->short->char - in that case
8331 MULTI_STEP_CVT will be 1).
8332 - INTERM_TYPES contains the intermediate type required to perform the
8333 narrowing operation (short in the above example). */
8334
8335 bool
8336 supportable_narrowing_operation (enum tree_code code,
8337 tree vectype_out, tree vectype_in,
8338 enum tree_code *code1, int *multi_step_cvt,
8339 vec<tree> *interm_types)
8340 {
8341 machine_mode vec_mode;
8342 enum insn_code icode1;
8343 optab optab1, interm_optab;
8344 tree vectype = vectype_in;
8345 tree narrow_vectype = vectype_out;
8346 enum tree_code c1;
8347 tree intermediate_type;
8348 machine_mode intermediate_mode, prev_mode;
8349 int i;
8350 bool uns;
8351
8352 *multi_step_cvt = 0;
8353 switch (code)
8354 {
8355 CASE_CONVERT:
8356 c1 = VEC_PACK_TRUNC_EXPR;
8357 break;
8358
8359 case FIX_TRUNC_EXPR:
8360 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8361 break;
8362
8363 case FLOAT_EXPR:
8364 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8365 tree code and optabs used for computing the operation. */
8366 return false;
8367
8368 default:
8369 gcc_unreachable ();
8370 }
8371
8372 if (code == FIX_TRUNC_EXPR)
8373 /* The signedness is determined from output operand. */
8374 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8375 else
8376 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8377
8378 if (!optab1)
8379 return false;
8380
8381 vec_mode = TYPE_MODE (vectype);
8382 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8383 return false;
8384
8385 *code1 = c1;
8386
8387 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8388 return true;
8389
8390 /* Check if it's a multi-step conversion that can be done using intermediate
8391 types. */
8392 prev_mode = vec_mode;
8393 if (code == FIX_TRUNC_EXPR)
8394 uns = TYPE_UNSIGNED (vectype_out);
8395 else
8396 uns = TYPE_UNSIGNED (vectype);
8397
8398 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8399 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8400 costly than signed. */
8401 if (code == FIX_TRUNC_EXPR && uns)
8402 {
8403 enum insn_code icode2;
8404
8405 intermediate_type
8406 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8407 interm_optab
8408 = optab_for_tree_code (c1, intermediate_type, optab_default);
8409 if (interm_optab != unknown_optab
8410 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8411 && insn_data[icode1].operand[0].mode
8412 == insn_data[icode2].operand[0].mode)
8413 {
8414 uns = false;
8415 optab1 = interm_optab;
8416 icode1 = icode2;
8417 }
8418 }
8419
8420 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8421 intermediate steps in promotion sequence. We try
8422 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8423 interm_types->create (MAX_INTERM_CVT_STEPS);
8424 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8425 {
8426 intermediate_mode = insn_data[icode1].operand[0].mode;
8427 intermediate_type
8428 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8429 interm_optab
8430 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8431 optab_default);
8432 if (!interm_optab
8433 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8434 || insn_data[icode1].operand[0].mode != intermediate_mode
8435 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8436 == CODE_FOR_nothing))
8437 break;
8438
8439 interm_types->quick_push (intermediate_type);
8440 (*multi_step_cvt)++;
8441
8442 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8443 return true;
8444
8445 prev_mode = intermediate_mode;
8446 optab1 = interm_optab;
8447 }
8448
8449 interm_types->release ();
8450 return false;
8451 }