re PR c++/29234 (Call to operator() of temporary object wrongly parsed)
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "gimple.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-manip.h"
39 #include "cfgloop.h"
40 #include "expr.h"
41 #include "recog.h" /* FIXME: for insn_data */
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "tree-vectorizer.h"
45 #include "dumpfile.h"
46
47 /* For lang_hooks.types.type_for_mode. */
48 #include "langhooks.h"
49
50 /* Return the vectorized type for the given statement. */
51
52 tree
53 stmt_vectype (struct _stmt_vec_info *stmt_info)
54 {
55 return STMT_VINFO_VECTYPE (stmt_info);
56 }
57
58 /* Return TRUE iff the given statement is in an inner loop relative to
59 the loop being vectorized. */
60 bool
61 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
62 {
63 gimple stmt = STMT_VINFO_STMT (stmt_info);
64 basic_block bb = gimple_bb (stmt);
65 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
66 struct loop* loop;
67
68 if (!loop_vinfo)
69 return false;
70
71 loop = LOOP_VINFO_LOOP (loop_vinfo);
72
73 return (bb->loop_father == loop->inner);
74 }
75
76 /* Record the cost of a statement, either by directly informing the
77 target model or by saving it in a vector for later processing.
78 Return a preliminary estimate of the statement's cost. */
79
80 unsigned
81 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
82 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
83 int misalign, enum vect_cost_model_location where)
84 {
85 if (body_cost_vec)
86 {
87 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
88 add_stmt_info_to_vec (body_cost_vec, count, kind,
89 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
90 misalign);
91 return (unsigned)
92 (builtin_vectorization_cost (kind, vectype, misalign) * count);
93
94 }
95 else
96 {
97 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
98 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
99 void *target_cost_data;
100
101 if (loop_vinfo)
102 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
103 else
104 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
105
106 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
107 misalign, where);
108 }
109 }
110
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115 {
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118 }
119
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125 static tree
126 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
128 {
129 tree vect_type, vect, vect_name, array_ref;
130 gimple new_stmt;
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
143
144 return vect_name;
145 }
146
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151 static void
152 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
154 {
155 tree array_ref;
156 gimple new_stmt;
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
164 }
165
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170 static tree
171 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
172 {
173 tree mem_ref, alias_ptr_type;
174
175 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
183
184 /* Function vect_mark_relevant.
185
186 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
187
188 static void
189 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
190 enum vect_relevant relevant, bool live_p,
191 bool used_in_pattern)
192 {
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple pattern_stmt;
197
198 if (dump_enabled_p ())
199 dump_printf_loc (MSG_NOTE, vect_location,
200 "mark relevant %d, live %d.\n", relevant, live_p);
201
202 /* If this stmt is an original stmt in a pattern, we might need to mark its
203 related pattern stmt instead of the original stmt. However, such stmts
204 may have their own uses that are not in any pattern, in such cases the
205 stmt itself should be marked. */
206 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
207 {
208 bool found = false;
209 if (!used_in_pattern)
210 {
211 imm_use_iterator imm_iter;
212 use_operand_p use_p;
213 gimple use_stmt;
214 tree lhs;
215 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
217
218 if (is_gimple_assign (stmt))
219 lhs = gimple_assign_lhs (stmt);
220 else
221 lhs = gimple_call_lhs (stmt);
222
223 /* This use is out of pattern use, if LHS has other uses that are
224 pattern uses, we should mark the stmt itself, and not the pattern
225 stmt. */
226 if (TREE_CODE (lhs) == SSA_NAME)
227 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
228 {
229 if (is_gimple_debug (USE_STMT (use_p)))
230 continue;
231 use_stmt = USE_STMT (use_p);
232
233 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
234 continue;
235
236 if (vinfo_for_stmt (use_stmt)
237 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
238 {
239 found = true;
240 break;
241 }
242 }
243 }
244
245 if (!found)
246 {
247 /* This is the last stmt in a sequence that was detected as a
248 pattern that can potentially be vectorized. Don't mark the stmt
249 as relevant/live because it's not going to be vectorized.
250 Instead mark the pattern-stmt that replaces it. */
251
252 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
253
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE, vect_location,
256 "last stmt in pattern. don't mark"
257 " relevant/live.\n");
258 stmt_info = vinfo_for_stmt (pattern_stmt);
259 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
260 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
261 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
262 stmt = pattern_stmt;
263 }
264 }
265
266 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
267 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
268 STMT_VINFO_RELEVANT (stmt_info) = relevant;
269
270 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
271 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "already marked relevant/live.\n");
276 return;
277 }
278
279 worklist->safe_push (stmt);
280 }
281
282
283 /* Function vect_stmt_relevant_p.
284
285 Return true if STMT in loop that is represented by LOOP_VINFO is
286 "relevant for vectorization".
287
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
292
293 CHECKME: what other side effects would the vectorizer allow? */
294
295 static bool
296 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
298 {
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
304
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
307
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt)
310 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
311 != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt))
317 {
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
322 }
323
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
326 {
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
328 {
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
331 {
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
335
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
338
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
343
344 *live_p = true;
345 }
346 }
347 }
348
349 return (*live_p || *relevant);
350 }
351
352
353 /* Function exist_non_indexing_operands_for_use_p
354
355 USE is one of the uses attached to STMT. Check if USE is
356 used in STMT for anything other than indexing an array. */
357
358 static bool
359 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
360 {
361 tree operand;
362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
363
364 /* USE corresponds to some operand in STMT. If there is no data
365 reference in STMT, then any operand that corresponds to USE
366 is not indexing an array. */
367 if (!STMT_VINFO_DATA_REF (stmt_info))
368 return true;
369
370 /* STMT has a data_ref. FORNOW this means that its of one of
371 the following forms:
372 -1- ARRAY_REF = var
373 -2- var = ARRAY_REF
374 (This should have been verified in analyze_data_refs).
375
376 'var' in the second case corresponds to a def, not a use,
377 so USE cannot correspond to any operands that are not used
378 for array indexing.
379
380 Therefore, all we need to check is if STMT falls into the
381 first case, and whether var corresponds to USE. */
382
383 if (!gimple_assign_copy_p (stmt))
384 return false;
385 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
386 return false;
387 operand = gimple_assign_rhs1 (stmt);
388 if (TREE_CODE (operand) != SSA_NAME)
389 return false;
390
391 if (operand == use)
392 return true;
393
394 return false;
395 }
396
397
398 /*
399 Function process_use.
400
401 Inputs:
402 - a USE in STMT in a loop represented by LOOP_VINFO
403 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
404 that defined USE. This is done by calling mark_relevant and passing it
405 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
406 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
407 be performed.
408
409 Outputs:
410 Generally, LIVE_P and RELEVANT are used to define the liveness and
411 relevance info of the DEF_STMT of this USE:
412 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
413 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
414 Exceptions:
415 - case 1: If USE is used only for address computations (e.g. array indexing),
416 which does not need to be directly vectorized, then the liveness/relevance
417 of the respective DEF_STMT is left unchanged.
418 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
419 skip DEF_STMT cause it had already been processed.
420 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
421 be modified accordingly.
422
423 Return true if everything is as expected. Return false otherwise. */
424
425 static bool
426 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
427 enum vect_relevant relevant, vec<gimple> *worklist,
428 bool force)
429 {
430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
431 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
432 stmt_vec_info dstmt_vinfo;
433 basic_block bb, def_bb;
434 tree def;
435 gimple def_stmt;
436 enum vect_def_type dt;
437
438 /* case 1: we are only interested in uses that need to be vectorized. Uses
439 that are used for address computation are not considered relevant. */
440 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
441 return true;
442
443 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
444 {
445 if (dump_enabled_p ())
446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
447 "not vectorized: unsupported use in stmt.\n");
448 return false;
449 }
450
451 if (!def_stmt || gimple_nop_p (def_stmt))
452 return true;
453
454 def_bb = gimple_bb (def_stmt);
455 if (!flow_bb_inside_loop_p (loop, def_bb))
456 {
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
459 return true;
460 }
461
462 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
463 DEF_STMT must have already been processed, because this should be the
464 only way that STMT, which is a reduction-phi, was put in the worklist,
465 as there should be no other uses for DEF_STMT in the loop. So we just
466 check that everything is as expected, and we are done. */
467 dstmt_vinfo = vinfo_for_stmt (def_stmt);
468 bb = gimple_bb (stmt);
469 if (gimple_code (stmt) == GIMPLE_PHI
470 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 && gimple_code (def_stmt) != GIMPLE_PHI
472 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
473 && bb->loop_father == def_bb->loop_father)
474 {
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE, vect_location,
477 "reduc-stmt defining reduc-phi in the same nest.\n");
478 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
479 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
480 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
481 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
482 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
483 return true;
484 }
485
486 /* case 3a: outer-loop stmt defining an inner-loop stmt:
487 outer-loop-header-bb:
488 d = def_stmt
489 inner-loop:
490 stmt # use (d)
491 outer-loop-tail-bb:
492 ... */
493 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
494 {
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE, vect_location,
497 "outer-loop def-stmt defining inner-loop stmt.\n");
498
499 switch (relevant)
500 {
501 case vect_unused_in_scope:
502 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
503 vect_used_in_scope : vect_unused_in_scope;
504 break;
505
506 case vect_used_in_outer_by_reduction:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
508 relevant = vect_used_by_reduction;
509 break;
510
511 case vect_used_in_outer:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
513 relevant = vect_used_in_scope;
514 break;
515
516 case vect_used_in_scope:
517 break;
518
519 default:
520 gcc_unreachable ();
521 }
522 }
523
524 /* case 3b: inner-loop stmt defining an outer-loop stmt:
525 outer-loop-header-bb:
526 ...
527 inner-loop:
528 d = def_stmt
529 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
530 stmt # use (d) */
531 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
532 {
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE, vect_location,
535 "inner-loop def-stmt defining outer-loop stmt.\n");
536
537 switch (relevant)
538 {
539 case vect_unused_in_scope:
540 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
541 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
542 vect_used_in_outer_by_reduction : vect_unused_in_scope;
543 break;
544
545 case vect_used_by_reduction:
546 relevant = vect_used_in_outer_by_reduction;
547 break;
548
549 case vect_used_in_scope:
550 relevant = vect_used_in_outer;
551 break;
552
553 default:
554 gcc_unreachable ();
555 }
556 }
557
558 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
559 is_pattern_stmt_p (stmt_vinfo));
560 return true;
561 }
562
563
564 /* Function vect_mark_stmts_to_be_vectorized.
565
566 Not all stmts in the loop need to be vectorized. For example:
567
568 for i...
569 for j...
570 1. T0 = i + j
571 2. T1 = a[T0]
572
573 3. j = j + 1
574
575 Stmt 1 and 3 do not need to be vectorized, because loop control and
576 addressing of vectorized data-refs are handled differently.
577
578 This pass detects such stmts. */
579
580 bool
581 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
582 {
583 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
584 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
585 unsigned int nbbs = loop->num_nodes;
586 gimple_stmt_iterator si;
587 gimple stmt;
588 unsigned int i;
589 stmt_vec_info stmt_vinfo;
590 basic_block bb;
591 gimple phi;
592 bool live_p;
593 enum vect_relevant relevant, tmp_relevant;
594 enum vect_def_type def_type;
595
596 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE, vect_location,
598 "=== vect_mark_stmts_to_be_vectorized ===\n");
599
600 stack_vec<gimple, 64> worklist;
601
602 /* 1. Init worklist. */
603 for (i = 0; i < nbbs; i++)
604 {
605 bb = bbs[i];
606 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
607 {
608 phi = gsi_stmt (si);
609 if (dump_enabled_p ())
610 {
611 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
613 dump_printf (MSG_NOTE, "\n");
614 }
615
616 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
617 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
618 }
619 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
620 {
621 stmt = gsi_stmt (si);
622 if (dump_enabled_p ())
623 {
624 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
625 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
626 dump_printf (MSG_NOTE, "\n");
627 }
628
629 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
630 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
631 }
632 }
633
634 /* 2. Process_worklist */
635 while (worklist.length () > 0)
636 {
637 use_operand_p use_p;
638 ssa_op_iter iter;
639
640 stmt = worklist.pop ();
641 if (dump_enabled_p ())
642 {
643 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
645 dump_printf (MSG_NOTE, "\n");
646 }
647
648 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
649 (DEF_STMT) as relevant/irrelevant and live/dead according to the
650 liveness and relevance properties of STMT. */
651 stmt_vinfo = vinfo_for_stmt (stmt);
652 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
653 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
654
655 /* Generally, the liveness and relevance properties of STMT are
656 propagated as is to the DEF_STMTs of its USEs:
657 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
658 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
659
660 One exception is when STMT has been identified as defining a reduction
661 variable; in this case we set the liveness/relevance as follows:
662 live_p = false
663 relevant = vect_used_by_reduction
664 This is because we distinguish between two kinds of relevant stmts -
665 those that are used by a reduction computation, and those that are
666 (also) used by a regular computation. This allows us later on to
667 identify stmts that are used solely by a reduction, and therefore the
668 order of the results that they produce does not have to be kept. */
669
670 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
671 tmp_relevant = relevant;
672 switch (def_type)
673 {
674 case vect_reduction_def:
675 switch (tmp_relevant)
676 {
677 case vect_unused_in_scope:
678 relevant = vect_used_by_reduction;
679 break;
680
681 case vect_used_by_reduction:
682 if (gimple_code (stmt) == GIMPLE_PHI)
683 break;
684 /* fall through */
685
686 default:
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
690 return false;
691 }
692
693 live_p = false;
694 break;
695
696 case vect_nested_cycle:
697 if (tmp_relevant != vect_unused_in_scope
698 && tmp_relevant != vect_used_in_outer_by_reduction
699 && tmp_relevant != vect_used_in_outer)
700 {
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of nested cycle.\n");
704
705 return false;
706 }
707
708 live_p = false;
709 break;
710
711 case vect_double_reduction_def:
712 if (tmp_relevant != vect_unused_in_scope
713 && tmp_relevant != vect_used_by_reduction)
714 {
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of double reduction.\n");
718
719 return false;
720 }
721
722 live_p = false;
723 break;
724
725 default:
726 break;
727 }
728
729 if (is_pattern_stmt_p (stmt_vinfo))
730 {
731 /* Pattern statements are not inserted into the code, so
732 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
733 have to scan the RHS or function arguments instead. */
734 if (is_gimple_assign (stmt))
735 {
736 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
737 tree op = gimple_assign_rhs1 (stmt);
738
739 i = 1;
740 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
741 {
742 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
743 live_p, relevant, &worklist, false)
744 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
745 live_p, relevant, &worklist, false))
746 return false;
747 i = 2;
748 }
749 for (; i < gimple_num_ops (stmt); i++)
750 {
751 op = gimple_op (stmt, i);
752 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
753 &worklist, false))
754 return false;
755 }
756 }
757 else if (is_gimple_call (stmt))
758 {
759 for (i = 0; i < gimple_call_num_args (stmt); i++)
760 {
761 tree arg = gimple_call_arg (stmt, i);
762 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
763 &worklist, false))
764 return false;
765 }
766 }
767 }
768 else
769 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
770 {
771 tree op = USE_FROM_PTR (use_p);
772 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
773 &worklist, false))
774 return false;
775 }
776
777 if (STMT_VINFO_GATHER_P (stmt_vinfo))
778 {
779 tree off;
780 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
781 gcc_assert (decl);
782 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
783 &worklist, true))
784 return false;
785 }
786 } /* while worklist */
787
788 return true;
789 }
790
791
792 /* Function vect_model_simple_cost.
793
794 Models cost for simple operations, i.e. those that only emit ncopies of a
795 single op. Right now, this does not account for multiple insns that could
796 be generated for the single vector op. We will handle that shortly. */
797
798 void
799 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 stmt_vector_for_cost *prologue_cost_vec,
802 stmt_vector_for_cost *body_cost_vec)
803 {
804 int i;
805 int inside_cost = 0, prologue_cost = 0;
806
807 /* The SLP costs were already calculated during SLP tree build. */
808 if (PURE_SLP_STMT (stmt_info))
809 return;
810
811 /* FORNOW: Assuming maximum 2 args per stmts. */
812 for (i = 0; i < 2; i++)
813 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
814 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
815 stmt_info, 0, vect_prologue);
816
817 /* Pass the inside-of-loop statements to the target-specific cost model. */
818 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
819 stmt_info, 0, vect_body);
820
821 if (dump_enabled_p ())
822 dump_printf_loc (MSG_NOTE, vect_location,
823 "vect_model_simple_cost: inside_cost = %d, "
824 "prologue_cost = %d .\n", inside_cost, prologue_cost);
825 }
826
827
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
832
833 static void
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
835 enum vect_def_type *dt, int pwr)
836 {
837 int i, tmp;
838 int inside_cost = 0, prologue_cost = 0;
839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
840 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
841 void *target_cost_data;
842
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info))
845 return;
846
847 if (loop_vinfo)
848 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
849 else
850 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
851
852 for (i = 0; i < pwr + 1; i++)
853 {
854 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
855 (i + 1) : i;
856 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
857 vec_promote_demote, stmt_info, 0,
858 vect_body);
859 }
860
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
866
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 }
872
873 /* Function vect_cost_group_size
874
875 For grouped load or store, return the group_size only if it is the first
876 load or store of a group, else return 1. This ensures that group size is
877 only returned once per group. */
878
879 static int
880 vect_cost_group_size (stmt_vec_info stmt_info)
881 {
882 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
883
884 if (first_stmt == STMT_VINFO_STMT (stmt_info))
885 return GROUP_SIZE (stmt_info);
886
887 return 1;
888 }
889
890
891 /* Function vect_model_store_cost
892
893 Models cost for stores. In the case of grouped accesses, one access
894 has the overhead of the grouped access attributed to it. */
895
896 void
897 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
898 bool store_lanes_p, enum vect_def_type dt,
899 slp_tree slp_node,
900 stmt_vector_for_cost *prologue_cost_vec,
901 stmt_vector_for_cost *body_cost_vec)
902 {
903 int group_size;
904 unsigned int inside_cost = 0, prologue_cost = 0;
905 struct data_reference *first_dr;
906 gimple first_stmt;
907
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info))
910 return;
911
912 if (dt == vect_constant_def || dt == vect_external_def)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
915
916 /* Grouped access? */
917 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
918 {
919 if (slp_node)
920 {
921 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
922 group_size = 1;
923 }
924 else
925 {
926 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
927 group_size = vect_cost_group_size (stmt_info);
928 }
929
930 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
931 }
932 /* Not a grouped access. */
933 else
934 {
935 group_size = 1;
936 first_dr = STMT_VINFO_DATA_REF (stmt_info);
937 }
938
939 /* We assume that the cost of a single store-lanes instruction is
940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
941 access is instead being provided by a permute-and-store operation,
942 include the cost of the permutes. */
943 if (!store_lanes_p && group_size > 1)
944 {
945 /* Uses a high and low interleave operation for each needed permute. */
946
947 int nstmts = ncopies * exact_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
950
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
955 }
956
957 /* Costs of the stores. */
958 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
959
960 if (dump_enabled_p ())
961 dump_printf_loc (MSG_NOTE, vect_location,
962 "vect_model_store_cost: inside_cost = %d, "
963 "prologue_cost = %d .\n", inside_cost, prologue_cost);
964 }
965
966
967 /* Calculate cost of DR's memory access. */
968 void
969 vect_get_store_cost (struct data_reference *dr, int ncopies,
970 unsigned int *inside_cost,
971 stmt_vector_for_cost *body_cost_vec)
972 {
973 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
974 gimple stmt = DR_STMT (dr);
975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
976
977 switch (alignment_support_scheme)
978 {
979 case dr_aligned:
980 {
981 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
982 vector_store, stmt_info, 0,
983 vect_body);
984
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_NOTE, vect_location,
987 "vect_model_store_cost: aligned.\n");
988 break;
989 }
990
991 case dr_unaligned_supported:
992 {
993 /* Here, we assign an additional cost for the unaligned store. */
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 unaligned_store, stmt_info,
996 DR_MISALIGNMENT (dr), vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: unaligned supported by "
1000 "hardware.\n");
1001 break;
1002 }
1003
1004 case dr_unaligned_unsupported:
1005 {
1006 *inside_cost = VECT_MAX_COST;
1007
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1010 "vect_model_store_cost: unsupported access.\n");
1011 break;
1012 }
1013
1014 default:
1015 gcc_unreachable ();
1016 }
1017 }
1018
1019
1020 /* Function vect_model_load_cost
1021
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1026
1027 void
1028 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1029 bool load_lanes_p, slp_tree slp_node,
1030 stmt_vector_for_cost *prologue_cost_vec,
1031 stmt_vector_for_cost *body_cost_vec)
1032 {
1033 int group_size;
1034 gimple first_stmt;
1035 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1036 unsigned int inside_cost = 0, prologue_cost = 0;
1037
1038 /* The SLP costs were already calculated during SLP tree build. */
1039 if (PURE_SLP_STMT (stmt_info))
1040 return;
1041
1042 /* Grouped accesses? */
1043 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1044 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1045 {
1046 group_size = vect_cost_group_size (stmt_info);
1047 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1048 }
1049 /* Not a grouped access. */
1050 else
1051 {
1052 group_size = 1;
1053 first_dr = dr;
1054 }
1055
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (!load_lanes_p && group_size > 1)
1061 {
1062 /* Uses an even and odd extract operations for each needed permute. */
1063 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1064 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1065 stmt_info, 0, vect_body);
1066
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE, vect_location,
1069 "vect_model_load_cost: strided group_size = %d .\n",
1070 group_size);
1071 }
1072
1073 /* The loads themselves. */
1074 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1075 {
1076 /* N scalar loads plus gathering them into a vector. */
1077 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1078 inside_cost += record_stmt_cost (body_cost_vec,
1079 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1080 scalar_load, stmt_info, 0, vect_body);
1081 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1082 stmt_info, 0, vect_body);
1083 }
1084 else
1085 vect_get_load_cost (first_dr, ncopies,
1086 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1087 || group_size > 1 || slp_node),
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1090
1091 if (dump_enabled_p ())
1092 dump_printf_loc (MSG_NOTE, vect_location,
1093 "vect_model_load_cost: inside_cost = %d, "
1094 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1095 }
1096
1097
1098 /* Calculate cost of DR's memory access. */
1099 void
1100 vect_get_load_cost (struct data_reference *dr, int ncopies,
1101 bool add_realign_cost, unsigned int *inside_cost,
1102 unsigned int *prologue_cost,
1103 stmt_vector_for_cost *prologue_cost_vec,
1104 stmt_vector_for_cost *body_cost_vec,
1105 bool record_prologue_costs)
1106 {
1107 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1108 gimple stmt = DR_STMT (dr);
1109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1110
1111 switch (alignment_support_scheme)
1112 {
1113 case dr_aligned:
1114 {
1115 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1116 stmt_info, 0, vect_body);
1117
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: aligned.\n");
1121
1122 break;
1123 }
1124 case dr_unaligned_supported:
1125 {
1126 /* Here, we assign an additional cost for the unaligned load. */
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 unaligned_load, stmt_info,
1129 DR_MISALIGNMENT (dr), vect_body);
1130
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: unaligned supported by "
1134 "hardware.\n");
1135
1136 break;
1137 }
1138 case dr_explicit_realign:
1139 {
1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1141 vector_load, stmt_info, 0, vect_body);
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1143 vec_perm, stmt_info, 0, vect_body);
1144
1145 /* FIXME: If the misalignment remains fixed across the iterations of
1146 the containing loop, the following cost should be added to the
1147 prologue costs. */
1148 if (targetm.vectorize.builtin_mask_for_load)
1149 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1150 stmt_info, 0, vect_body);
1151
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE, vect_location,
1154 "vect_model_load_cost: explicit realign\n");
1155
1156 break;
1157 }
1158 case dr_explicit_realign_optimized:
1159 {
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "vect_model_load_cost: unaligned software "
1163 "pipelined.\n");
1164
1165 /* Unaligned software pipeline has a load of an address, an initial
1166 load, and possibly a mask operation to "prime" the loop. However,
1167 if this is an access in a group of loads, which provide grouped
1168 access, then the above cost should only be considered for one
1169 access in the group. Inside the loop, there is a load op
1170 and a realignment op. */
1171
1172 if (add_realign_cost && record_prologue_costs)
1173 {
1174 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1175 vector_stmt, stmt_info,
1176 0, vect_prologue);
1177 if (targetm.vectorize.builtin_mask_for_load)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1179 vector_stmt, stmt_info,
1180 0, vect_prologue);
1181 }
1182
1183 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1184 stmt_info, 0, vect_body);
1185 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1186 stmt_info, 0, vect_body);
1187
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign optimized"
1191 "\n");
1192
1193 break;
1194 }
1195
1196 case dr_unaligned_unsupported:
1197 {
1198 *inside_cost = VECT_MAX_COST;
1199
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1202 "vect_model_load_cost: unsupported access.\n");
1203 break;
1204 }
1205
1206 default:
1207 gcc_unreachable ();
1208 }
1209 }
1210
1211 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1212 the loop preheader for the vectorized stmt STMT. */
1213
1214 static void
1215 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1216 {
1217 if (gsi)
1218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1219 else
1220 {
1221 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1223
1224 if (loop_vinfo)
1225 {
1226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1227 basic_block new_bb;
1228 edge pe;
1229
1230 if (nested_in_vect_loop_p (loop, stmt))
1231 loop = loop->inner;
1232
1233 pe = loop_preheader_edge (loop);
1234 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1235 gcc_assert (!new_bb);
1236 }
1237 else
1238 {
1239 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1240 basic_block bb;
1241 gimple_stmt_iterator gsi_bb_start;
1242
1243 gcc_assert (bb_vinfo);
1244 bb = BB_VINFO_BB (bb_vinfo);
1245 gsi_bb_start = gsi_after_labels (bb);
1246 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1247 }
1248 }
1249
1250 if (dump_enabled_p ())
1251 {
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "created new init_stmt: ");
1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1255 dump_printf (MSG_NOTE, "\n");
1256 }
1257 }
1258
1259 /* Function vect_init_vector.
1260
1261 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1262 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1263 vector type a vector with all elements equal to VAL is created first.
1264 Place the initialization at BSI if it is not NULL. Otherwise, place the
1265 initialization at the loop preheader.
1266 Return the DEF of INIT_STMT.
1267 It will be used in the vectorization of STMT. */
1268
1269 tree
1270 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1271 {
1272 tree new_var;
1273 gimple init_stmt;
1274 tree vec_oprnd;
1275 tree new_temp;
1276
1277 if (TREE_CODE (type) == VECTOR_TYPE
1278 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1279 {
1280 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1281 {
1282 if (CONSTANT_CLASS_P (val))
1283 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1284 else
1285 {
1286 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1287 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1288 new_temp, val,
1289 NULL_TREE);
1290 vect_init_vector_1 (stmt, init_stmt, gsi);
1291 val = new_temp;
1292 }
1293 }
1294 val = build_vector_from_val (type, val);
1295 }
1296
1297 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1298 init_stmt = gimple_build_assign (new_var, val);
1299 new_temp = make_ssa_name (new_var, init_stmt);
1300 gimple_assign_set_lhs (init_stmt, new_temp);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 vec_oprnd = gimple_assign_lhs (init_stmt);
1303 return vec_oprnd;
1304 }
1305
1306
1307 /* Function vect_get_vec_def_for_operand.
1308
1309 OP is an operand in STMT. This function returns a (vector) def that will be
1310 used in the vectorized stmt for STMT.
1311
1312 In the case that OP is an SSA_NAME which is defined in the loop, then
1313 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1314
1315 In case OP is an invariant or constant, a new stmt that creates a vector def
1316 needs to be introduced. */
1317
1318 tree
1319 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1320 {
1321 tree vec_oprnd;
1322 gimple vec_stmt;
1323 gimple def_stmt;
1324 stmt_vec_info def_stmt_info = NULL;
1325 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1326 unsigned int nunits;
1327 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1328 tree def;
1329 enum vect_def_type dt;
1330 bool is_simple_use;
1331 tree vector_type;
1332
1333 if (dump_enabled_p ())
1334 {
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "vect_get_vec_def_for_operand: ");
1337 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1338 dump_printf (MSG_NOTE, "\n");
1339 }
1340
1341 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1342 &def_stmt, &def, &dt);
1343 gcc_assert (is_simple_use);
1344 if (dump_enabled_p ())
1345 {
1346 int loc_printed = 0;
1347 if (def)
1348 {
1349 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1350 loc_printed = 1;
1351 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1352 dump_printf (MSG_NOTE, "\n");
1353 }
1354 if (def_stmt)
1355 {
1356 if (loc_printed)
1357 dump_printf (MSG_NOTE, " def_stmt = ");
1358 else
1359 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1360 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1361 dump_printf (MSG_NOTE, "\n");
1362 }
1363 }
1364
1365 switch (dt)
1366 {
1367 /* Case 1: operand is a constant. */
1368 case vect_constant_def:
1369 {
1370 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1371 gcc_assert (vector_type);
1372 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1373
1374 if (scalar_def)
1375 *scalar_def = op;
1376
1377 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "Create vector_cst. nunits = %d\n", nunits);
1381
1382 return vect_init_vector (stmt, op, vector_type, NULL);
1383 }
1384
1385 /* Case 2: operand is defined outside the loop - loop invariant. */
1386 case vect_external_def:
1387 {
1388 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1389 gcc_assert (vector_type);
1390
1391 if (scalar_def)
1392 *scalar_def = def;
1393
1394 /* Create 'vec_inv = {inv,inv,..,inv}' */
1395 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1397
1398 return vect_init_vector (stmt, def, vector_type, NULL);
1399 }
1400
1401 /* Case 3: operand is defined inside the loop. */
1402 case vect_internal_def:
1403 {
1404 if (scalar_def)
1405 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1406
1407 /* Get the def from the vectorized stmt. */
1408 def_stmt_info = vinfo_for_stmt (def_stmt);
1409
1410 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1411 /* Get vectorized pattern statement. */
1412 if (!vec_stmt
1413 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1414 && !STMT_VINFO_RELEVANT (def_stmt_info))
1415 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1416 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1417 gcc_assert (vec_stmt);
1418 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1419 vec_oprnd = PHI_RESULT (vec_stmt);
1420 else if (is_gimple_call (vec_stmt))
1421 vec_oprnd = gimple_call_lhs (vec_stmt);
1422 else
1423 vec_oprnd = gimple_assign_lhs (vec_stmt);
1424 return vec_oprnd;
1425 }
1426
1427 /* Case 4: operand is defined by a loop header phi - reduction */
1428 case vect_reduction_def:
1429 case vect_double_reduction_def:
1430 case vect_nested_cycle:
1431 {
1432 struct loop *loop;
1433
1434 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1435 loop = (gimple_bb (def_stmt))->loop_father;
1436
1437 /* Get the def before the loop */
1438 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1439 return get_initial_def_for_reduction (stmt, op, scalar_def);
1440 }
1441
1442 /* Case 5: operand is defined by loop-header phi - induction. */
1443 case vect_induction_def:
1444 {
1445 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1446
1447 /* Get the def from the vectorized stmt. */
1448 def_stmt_info = vinfo_for_stmt (def_stmt);
1449 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else
1453 vec_oprnd = gimple_get_lhs (vec_stmt);
1454 return vec_oprnd;
1455 }
1456
1457 default:
1458 gcc_unreachable ();
1459 }
1460 }
1461
1462
1463 /* Function vect_get_vec_def_for_stmt_copy
1464
1465 Return a vector-def for an operand. This function is used when the
1466 vectorized stmt to be created (by the caller to this function) is a "copy"
1467 created in case the vectorized result cannot fit in one vector, and several
1468 copies of the vector-stmt are required. In this case the vector-def is
1469 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1470 of the stmt that defines VEC_OPRND.
1471 DT is the type of the vector def VEC_OPRND.
1472
1473 Context:
1474 In case the vectorization factor (VF) is bigger than the number
1475 of elements that can fit in a vectype (nunits), we have to generate
1476 more than one vector stmt to vectorize the scalar stmt. This situation
1477 arises when there are multiple data-types operated upon in the loop; the
1478 smallest data-type determines the VF, and as a result, when vectorizing
1479 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1480 vector stmt (each computing a vector of 'nunits' results, and together
1481 computing 'VF' results in each iteration). This function is called when
1482 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1483 which VF=16 and nunits=4, so the number of copies required is 4):
1484
1485 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486
1487 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1488 VS1.1: vx.1 = memref1 VS1.2
1489 VS1.2: vx.2 = memref2 VS1.3
1490 VS1.3: vx.3 = memref3
1491
1492 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1493 VSnew.1: vz1 = vx.1 + ... VSnew.2
1494 VSnew.2: vz2 = vx.2 + ... VSnew.3
1495 VSnew.3: vz3 = vx.3 + ...
1496
1497 The vectorization of S1 is explained in vectorizable_load.
1498 The vectorization of S2:
1499 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1500 the function 'vect_get_vec_def_for_operand' is called to
1501 get the relevant vector-def for each operand of S2. For operand x it
1502 returns the vector-def 'vx.0'.
1503
1504 To create the remaining copies of the vector-stmt (VSnew.j), this
1505 function is called to get the relevant vector-def for each operand. It is
1506 obtained from the respective VS1.j stmt, which is recorded in the
1507 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508
1509 For example, to obtain the vector-def 'vx.1' in order to create the
1510 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1511 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1512 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1513 and return its def ('vx.1').
1514 Overall, to create the above sequence this function will be called 3 times:
1515 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1516 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1517 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518
1519 tree
1520 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 {
1522 gimple vec_stmt_for_operand;
1523 stmt_vec_info def_stmt_info;
1524
1525 /* Do nothing; can reuse same def. */
1526 if (dt == vect_external_def || dt == vect_constant_def )
1527 return vec_oprnd;
1528
1529 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1530 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1531 gcc_assert (def_stmt_info);
1532 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1533 gcc_assert (vec_stmt_for_operand);
1534 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1535 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1536 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1537 else
1538 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1539 return vec_oprnd;
1540 }
1541
1542
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1545
1546 static void
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1548 vec<tree> *vec_oprnds0,
1549 vec<tree> *vec_oprnds1)
1550 {
1551 tree vec_oprnd = vec_oprnds0->pop ();
1552
1553 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1554 vec_oprnds0->quick_push (vec_oprnd);
1555
1556 if (vec_oprnds1 && vec_oprnds1->length ())
1557 {
1558 vec_oprnd = vec_oprnds1->pop ();
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1560 vec_oprnds1->quick_push (vec_oprnd);
1561 }
1562 }
1563
1564
1565 /* Get vectorized definitions for OP0 and OP1.
1566 REDUC_INDEX is the index of reduction operand in case of reduction,
1567 and -1 otherwise. */
1568
1569 void
1570 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1571 vec<tree> *vec_oprnds0,
1572 vec<tree> *vec_oprnds1,
1573 slp_tree slp_node, int reduc_index)
1574 {
1575 if (slp_node)
1576 {
1577 int nops = (op1 == NULL_TREE) ? 1 : 2;
1578 vec<tree> ops;
1579 ops.create (nops);
1580 vec<vec<tree> > vec_defs;
1581 vec_defs.create (nops);
1582
1583 ops.quick_push (op0);
1584 if (op1)
1585 ops.quick_push (op1);
1586
1587 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1588
1589 *vec_oprnds0 = vec_defs[0];
1590 if (op1)
1591 *vec_oprnds1 = vec_defs[1];
1592
1593 ops.release ();
1594 vec_defs.release ();
1595 }
1596 else
1597 {
1598 tree vec_oprnd;
1599
1600 vec_oprnds0->create (1);
1601 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1602 vec_oprnds0->quick_push (vec_oprnd);
1603
1604 if (op1)
1605 {
1606 vec_oprnds1->create (1);
1607 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1608 vec_oprnds1->quick_push (vec_oprnd);
1609 }
1610 }
1611 }
1612
1613
1614 /* Function vect_finish_stmt_generation.
1615
1616 Insert a new stmt. */
1617
1618 void
1619 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1620 gimple_stmt_iterator *gsi)
1621 {
1622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1625
1626 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1627
1628 if (!gsi_end_p (*gsi)
1629 && gimple_has_mem_ops (vec_stmt))
1630 {
1631 gimple at_stmt = gsi_stmt (*gsi);
1632 tree vuse = gimple_vuse (at_stmt);
1633 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1634 {
1635 tree vdef = gimple_vdef (at_stmt);
1636 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1637 /* If we have an SSA vuse and insert a store, update virtual
1638 SSA form to avoid triggering the renamer. Do so only
1639 if we can easily see all uses - which is what almost always
1640 happens with the way vectorized stmts are inserted. */
1641 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1642 && ((is_gimple_assign (vec_stmt)
1643 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1644 || (is_gimple_call (vec_stmt)
1645 && !(gimple_call_flags (vec_stmt)
1646 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1647 {
1648 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1649 gimple_set_vdef (vec_stmt, new_vdef);
1650 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1651 }
1652 }
1653 }
1654 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1655
1656 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1657 bb_vinfo));
1658
1659 if (dump_enabled_p ())
1660 {
1661 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1663 dump_printf (MSG_NOTE, "\n");
1664 }
1665
1666 gimple_set_location (vec_stmt, gimple_location (stmt));
1667 }
1668
1669 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1670 a function declaration if the target has a vectorized version
1671 of the function, or NULL_TREE if the function cannot be vectorized. */
1672
1673 tree
1674 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1675 {
1676 tree fndecl = gimple_call_fndecl (call);
1677
1678 /* We only handle functions that do not read or clobber memory -- i.e.
1679 const or novops ones. */
1680 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1681 return NULL_TREE;
1682
1683 if (!fndecl
1684 || TREE_CODE (fndecl) != FUNCTION_DECL
1685 || !DECL_BUILT_IN (fndecl))
1686 return NULL_TREE;
1687
1688 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1689 vectype_in);
1690 }
1691
1692 /* Function vectorizable_call.
1693
1694 Check if STMT performs a function call that can be vectorized.
1695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1698
1699 static bool
1700 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1701 slp_tree slp_node)
1702 {
1703 tree vec_dest;
1704 tree scalar_dest;
1705 tree op, type;
1706 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1708 tree vectype_out, vectype_in;
1709 int nunits_in;
1710 int nunits_out;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1713 tree fndecl, new_temp, def, rhs_type;
1714 gimple def_stmt;
1715 enum vect_def_type dt[3]
1716 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1717 gimple new_stmt = NULL;
1718 int ncopies, j;
1719 vec<tree> vargs = vNULL;
1720 enum { NARROW, NONE, WIDEN } modifier;
1721 size_t i, nargs;
1722 tree lhs;
1723
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1725 return false;
1726
1727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1728 return false;
1729
1730 /* Is STMT a vectorizable call? */
1731 if (!is_gimple_call (stmt))
1732 return false;
1733
1734 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1735 return false;
1736
1737 if (stmt_can_throw_internal (stmt))
1738 return false;
1739
1740 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1741
1742 /* Process function arguments. */
1743 rhs_type = NULL_TREE;
1744 vectype_in = NULL_TREE;
1745 nargs = gimple_call_num_args (stmt);
1746
1747 /* Bail out if the function has more than three arguments, we do not have
1748 interesting builtin functions to vectorize with more than two arguments
1749 except for fma. No arguments is also not good. */
1750 if (nargs == 0 || nargs > 3)
1751 return false;
1752
1753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1754 if (gimple_call_internal_p (stmt)
1755 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1756 {
1757 nargs = 0;
1758 rhs_type = unsigned_type_node;
1759 }
1760
1761 for (i = 0; i < nargs; i++)
1762 {
1763 tree opvectype;
1764
1765 op = gimple_call_arg (stmt, i);
1766
1767 /* We can only handle calls with arguments of the same type. */
1768 if (rhs_type
1769 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1770 {
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "argument types differ.\n");
1774 return false;
1775 }
1776 if (!rhs_type)
1777 rhs_type = TREE_TYPE (op);
1778
1779 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1780 &def_stmt, &def, &dt[i], &opvectype))
1781 {
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "use not simple.\n");
1785 return false;
1786 }
1787
1788 if (!vectype_in)
1789 vectype_in = opvectype;
1790 else if (opvectype
1791 && opvectype != vectype_in)
1792 {
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1795 "argument vector types differ.\n");
1796 return false;
1797 }
1798 }
1799 /* If all arguments are external or constant defs use a vector type with
1800 the same size as the output vector type. */
1801 if (!vectype_in)
1802 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1803 if (vec_stmt)
1804 gcc_assert (vectype_in);
1805 if (!vectype_in)
1806 {
1807 if (dump_enabled_p ())
1808 {
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "no vectype for scalar type ");
1811 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1812 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1813 }
1814
1815 return false;
1816 }
1817
1818 /* FORNOW */
1819 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1820 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1821 if (nunits_in == nunits_out / 2)
1822 modifier = NARROW;
1823 else if (nunits_out == nunits_in)
1824 modifier = NONE;
1825 else if (nunits_out == nunits_in / 2)
1826 modifier = WIDEN;
1827 else
1828 return false;
1829
1830 /* For now, we only vectorize functions if a target specific builtin
1831 is available. TODO -- in some cases, it might be profitable to
1832 insert the calls for pieces of the vector, in order to be able
1833 to vectorize other operations in the loop. */
1834 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1835 if (fndecl == NULL_TREE)
1836 {
1837 if (gimple_call_internal_p (stmt)
1838 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1839 && !slp_node
1840 && loop_vinfo
1841 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1842 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1843 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1844 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1845 {
1846 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1847 { 0, 1, 2, ... vf - 1 } vector. */
1848 gcc_assert (nargs == 0);
1849 }
1850 else
1851 {
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854 "function is not vectorizable.\n");
1855 return false;
1856 }
1857 }
1858
1859 gcc_assert (!gimple_vuse (stmt));
1860
1861 if (slp_node || PURE_SLP_STMT (stmt_info))
1862 ncopies = 1;
1863 else if (modifier == NARROW)
1864 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1865 else
1866 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1867
1868 /* Sanity check: make sure that at least one copy of the vectorized stmt
1869 needs to be generated. */
1870 gcc_assert (ncopies >= 1);
1871
1872 if (!vec_stmt) /* transformation not required. */
1873 {
1874 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1877 "\n");
1878 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1879 return true;
1880 }
1881
1882 /** Transform. **/
1883
1884 if (dump_enabled_p ())
1885 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
1886
1887 /* Handle def. */
1888 scalar_dest = gimple_call_lhs (stmt);
1889 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1890
1891 prev_stmt_info = NULL;
1892 switch (modifier)
1893 {
1894 case NONE:
1895 for (j = 0; j < ncopies; ++j)
1896 {
1897 /* Build argument list for the vectorized call. */
1898 if (j == 0)
1899 vargs.create (nargs);
1900 else
1901 vargs.truncate (0);
1902
1903 if (slp_node)
1904 {
1905 vec<vec<tree> > vec_defs;
1906 vec_defs.create (nargs);
1907 vec<tree> vec_oprnds0;
1908
1909 for (i = 0; i < nargs; i++)
1910 vargs.quick_push (gimple_call_arg (stmt, i));
1911 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1912 vec_oprnds0 = vec_defs[0];
1913
1914 /* Arguments are ready. Create the new vector stmt. */
1915 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1916 {
1917 size_t k;
1918 for (k = 0; k < nargs; k++)
1919 {
1920 vec<tree> vec_oprndsk = vec_defs[k];
1921 vargs[k] = vec_oprndsk[i];
1922 }
1923 new_stmt = gimple_build_call_vec (fndecl, vargs);
1924 new_temp = make_ssa_name (vec_dest, new_stmt);
1925 gimple_call_set_lhs (new_stmt, new_temp);
1926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1928 }
1929
1930 for (i = 0; i < nargs; i++)
1931 {
1932 vec<tree> vec_oprndsi = vec_defs[i];
1933 vec_oprndsi.release ();
1934 }
1935 vec_defs.release ();
1936 continue;
1937 }
1938
1939 for (i = 0; i < nargs; i++)
1940 {
1941 op = gimple_call_arg (stmt, i);
1942 if (j == 0)
1943 vec_oprnd0
1944 = vect_get_vec_def_for_operand (op, stmt, NULL);
1945 else
1946 {
1947 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1948 vec_oprnd0
1949 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1950 }
1951
1952 vargs.quick_push (vec_oprnd0);
1953 }
1954
1955 if (gimple_call_internal_p (stmt)
1956 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1957 {
1958 tree *v = XALLOCAVEC (tree, nunits_out);
1959 int k;
1960 for (k = 0; k < nunits_out; ++k)
1961 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1962 tree cst = build_vector (vectype_out, v);
1963 tree new_var
1964 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1965 gimple init_stmt = gimple_build_assign (new_var, cst);
1966 new_temp = make_ssa_name (new_var, init_stmt);
1967 gimple_assign_set_lhs (init_stmt, new_temp);
1968 vect_init_vector_1 (stmt, init_stmt, NULL);
1969 new_temp = make_ssa_name (vec_dest, NULL);
1970 new_stmt = gimple_build_assign (new_temp,
1971 gimple_assign_lhs (init_stmt));
1972 }
1973 else
1974 {
1975 new_stmt = gimple_build_call_vec (fndecl, vargs);
1976 new_temp = make_ssa_name (vec_dest, new_stmt);
1977 gimple_call_set_lhs (new_stmt, new_temp);
1978 }
1979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1980
1981 if (j == 0)
1982 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1983 else
1984 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1985
1986 prev_stmt_info = vinfo_for_stmt (new_stmt);
1987 }
1988
1989 break;
1990
1991 case NARROW:
1992 for (j = 0; j < ncopies; ++j)
1993 {
1994 /* Build argument list for the vectorized call. */
1995 if (j == 0)
1996 vargs.create (nargs * 2);
1997 else
1998 vargs.truncate (0);
1999
2000 if (slp_node)
2001 {
2002 vec<vec<tree> > vec_defs;
2003 vec_defs.create (nargs);
2004 vec<tree> vec_oprnds0;
2005
2006 for (i = 0; i < nargs; i++)
2007 vargs.quick_push (gimple_call_arg (stmt, i));
2008 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2009 vec_oprnds0 = vec_defs[0];
2010
2011 /* Arguments are ready. Create the new vector stmt. */
2012 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2013 {
2014 size_t k;
2015 vargs.truncate (0);
2016 for (k = 0; k < nargs; k++)
2017 {
2018 vec<tree> vec_oprndsk = vec_defs[k];
2019 vargs.quick_push (vec_oprndsk[i]);
2020 vargs.quick_push (vec_oprndsk[i + 1]);
2021 }
2022 new_stmt = gimple_build_call_vec (fndecl, vargs);
2023 new_temp = make_ssa_name (vec_dest, new_stmt);
2024 gimple_call_set_lhs (new_stmt, new_temp);
2025 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2026 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2027 }
2028
2029 for (i = 0; i < nargs; i++)
2030 {
2031 vec<tree> vec_oprndsi = vec_defs[i];
2032 vec_oprndsi.release ();
2033 }
2034 vec_defs.release ();
2035 continue;
2036 }
2037
2038 for (i = 0; i < nargs; i++)
2039 {
2040 op = gimple_call_arg (stmt, i);
2041 if (j == 0)
2042 {
2043 vec_oprnd0
2044 = vect_get_vec_def_for_operand (op, stmt, NULL);
2045 vec_oprnd1
2046 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2047 }
2048 else
2049 {
2050 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2051 vec_oprnd0
2052 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2053 vec_oprnd1
2054 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2055 }
2056
2057 vargs.quick_push (vec_oprnd0);
2058 vargs.quick_push (vec_oprnd1);
2059 }
2060
2061 new_stmt = gimple_build_call_vec (fndecl, vargs);
2062 new_temp = make_ssa_name (vec_dest, new_stmt);
2063 gimple_call_set_lhs (new_stmt, new_temp);
2064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2065
2066 if (j == 0)
2067 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2068 else
2069 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2070
2071 prev_stmt_info = vinfo_for_stmt (new_stmt);
2072 }
2073
2074 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2075
2076 break;
2077
2078 case WIDEN:
2079 /* No current target implements this case. */
2080 return false;
2081 }
2082
2083 vargs.release ();
2084
2085 /* Update the exception handling table with the vector stmt if necessary. */
2086 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2087 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2088
2089 /* The call in STMT might prevent it from being removed in dce.
2090 We however cannot remove it here, due to the way the ssa name
2091 it defines is mapped to the new definition. So just replace
2092 rhs of the statement with something harmless. */
2093
2094 if (slp_node)
2095 return true;
2096
2097 type = TREE_TYPE (scalar_dest);
2098 if (is_pattern_stmt_p (stmt_info))
2099 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2100 else
2101 lhs = gimple_call_lhs (stmt);
2102 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2103 set_vinfo_for_stmt (new_stmt, stmt_info);
2104 set_vinfo_for_stmt (stmt, NULL);
2105 STMT_VINFO_STMT (stmt_info) = new_stmt;
2106 gsi_replace (gsi, new_stmt, false);
2107 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2108
2109 return true;
2110 }
2111
2112
2113 /* Function vect_gen_widened_results_half
2114
2115 Create a vector stmt whose code, type, number of arguments, and result
2116 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2117 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2118 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2119 needs to be created (DECL is a function-decl of a target-builtin).
2120 STMT is the original scalar stmt that we are vectorizing. */
2121
2122 static gimple
2123 vect_gen_widened_results_half (enum tree_code code,
2124 tree decl,
2125 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2126 tree vec_dest, gimple_stmt_iterator *gsi,
2127 gimple stmt)
2128 {
2129 gimple new_stmt;
2130 tree new_temp;
2131
2132 /* Generate half of the widened result: */
2133 if (code == CALL_EXPR)
2134 {
2135 /* Target specific support */
2136 if (op_type == binary_op)
2137 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2138 else
2139 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2140 new_temp = make_ssa_name (vec_dest, new_stmt);
2141 gimple_call_set_lhs (new_stmt, new_temp);
2142 }
2143 else
2144 {
2145 /* Generic support */
2146 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2147 if (op_type != binary_op)
2148 vec_oprnd1 = NULL;
2149 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2150 vec_oprnd1);
2151 new_temp = make_ssa_name (vec_dest, new_stmt);
2152 gimple_assign_set_lhs (new_stmt, new_temp);
2153 }
2154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2155
2156 return new_stmt;
2157 }
2158
2159
2160 /* Get vectorized definitions for loop-based vectorization. For the first
2161 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2162 scalar operand), and for the rest we get a copy with
2163 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2164 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2165 The vectors are collected into VEC_OPRNDS. */
2166
2167 static void
2168 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2169 vec<tree> *vec_oprnds, int multi_step_cvt)
2170 {
2171 tree vec_oprnd;
2172
2173 /* Get first vector operand. */
2174 /* All the vector operands except the very first one (that is scalar oprnd)
2175 are stmt copies. */
2176 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2177 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2178 else
2179 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2180
2181 vec_oprnds->quick_push (vec_oprnd);
2182
2183 /* Get second vector operand. */
2184 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2185 vec_oprnds->quick_push (vec_oprnd);
2186
2187 *oprnd = vec_oprnd;
2188
2189 /* For conversion in multiple steps, continue to get operands
2190 recursively. */
2191 if (multi_step_cvt)
2192 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2193 }
2194
2195
2196 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2197 For multi-step conversions store the resulting vectors and call the function
2198 recursively. */
2199
2200 static void
2201 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2202 int multi_step_cvt, gimple stmt,
2203 vec<tree> vec_dsts,
2204 gimple_stmt_iterator *gsi,
2205 slp_tree slp_node, enum tree_code code,
2206 stmt_vec_info *prev_stmt_info)
2207 {
2208 unsigned int i;
2209 tree vop0, vop1, new_tmp, vec_dest;
2210 gimple new_stmt;
2211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2212
2213 vec_dest = vec_dsts.pop ();
2214
2215 for (i = 0; i < vec_oprnds->length (); i += 2)
2216 {
2217 /* Create demotion operation. */
2218 vop0 = (*vec_oprnds)[i];
2219 vop1 = (*vec_oprnds)[i + 1];
2220 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2221 new_tmp = make_ssa_name (vec_dest, new_stmt);
2222 gimple_assign_set_lhs (new_stmt, new_tmp);
2223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2224
2225 if (multi_step_cvt)
2226 /* Store the resulting vector for next recursive call. */
2227 (*vec_oprnds)[i/2] = new_tmp;
2228 else
2229 {
2230 /* This is the last step of the conversion sequence. Store the
2231 vectors in SLP_NODE or in vector info of the scalar statement
2232 (or in STMT_VINFO_RELATED_STMT chain). */
2233 if (slp_node)
2234 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2235 else
2236 {
2237 if (!*prev_stmt_info)
2238 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2239 else
2240 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2241
2242 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2243 }
2244 }
2245 }
2246
2247 /* For multi-step demotion operations we first generate demotion operations
2248 from the source type to the intermediate types, and then combine the
2249 results (stored in VEC_OPRNDS) in demotion operation to the destination
2250 type. */
2251 if (multi_step_cvt)
2252 {
2253 /* At each level of recursion we have half of the operands we had at the
2254 previous level. */
2255 vec_oprnds->truncate ((i+1)/2);
2256 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2257 stmt, vec_dsts, gsi, slp_node,
2258 VEC_PACK_TRUNC_EXPR,
2259 prev_stmt_info);
2260 }
2261
2262 vec_dsts.quick_push (vec_dest);
2263 }
2264
2265
2266 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2267 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2268 the resulting vectors and call the function recursively. */
2269
2270 static void
2271 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2272 vec<tree> *vec_oprnds1,
2273 gimple stmt, tree vec_dest,
2274 gimple_stmt_iterator *gsi,
2275 enum tree_code code1,
2276 enum tree_code code2, tree decl1,
2277 tree decl2, int op_type)
2278 {
2279 int i;
2280 tree vop0, vop1, new_tmp1, new_tmp2;
2281 gimple new_stmt1, new_stmt2;
2282 vec<tree> vec_tmp = vNULL;
2283
2284 vec_tmp.create (vec_oprnds0->length () * 2);
2285 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2286 {
2287 if (op_type == binary_op)
2288 vop1 = (*vec_oprnds1)[i];
2289 else
2290 vop1 = NULL_TREE;
2291
2292 /* Generate the two halves of promotion operation. */
2293 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2294 op_type, vec_dest, gsi, stmt);
2295 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2296 op_type, vec_dest, gsi, stmt);
2297 if (is_gimple_call (new_stmt1))
2298 {
2299 new_tmp1 = gimple_call_lhs (new_stmt1);
2300 new_tmp2 = gimple_call_lhs (new_stmt2);
2301 }
2302 else
2303 {
2304 new_tmp1 = gimple_assign_lhs (new_stmt1);
2305 new_tmp2 = gimple_assign_lhs (new_stmt2);
2306 }
2307
2308 /* Store the results for the next step. */
2309 vec_tmp.quick_push (new_tmp1);
2310 vec_tmp.quick_push (new_tmp2);
2311 }
2312
2313 vec_oprnds0->release ();
2314 *vec_oprnds0 = vec_tmp;
2315 }
2316
2317
2318 /* Check if STMT performs a conversion operation, that can be vectorized.
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2322
2323 static bool
2324 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2325 gimple *vec_stmt, slp_tree slp_node)
2326 {
2327 tree vec_dest;
2328 tree scalar_dest;
2329 tree op0, op1 = NULL_TREE;
2330 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2333 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2334 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2335 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2336 tree new_temp;
2337 tree def;
2338 gimple def_stmt;
2339 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2340 gimple new_stmt = NULL;
2341 stmt_vec_info prev_stmt_info;
2342 int nunits_in;
2343 int nunits_out;
2344 tree vectype_out, vectype_in;
2345 int ncopies, i, j;
2346 tree lhs_type, rhs_type;
2347 enum { NARROW, NONE, WIDEN } modifier;
2348 vec<tree> vec_oprnds0 = vNULL;
2349 vec<tree> vec_oprnds1 = vNULL;
2350 tree vop0;
2351 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2352 int multi_step_cvt = 0;
2353 vec<tree> vec_dsts = vNULL;
2354 vec<tree> interm_types = vNULL;
2355 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2356 int op_type;
2357 enum machine_mode rhs_mode;
2358 unsigned short fltsz;
2359
2360 /* Is STMT a vectorizable conversion? */
2361
2362 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2363 return false;
2364
2365 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2366 return false;
2367
2368 if (!is_gimple_assign (stmt))
2369 return false;
2370
2371 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2372 return false;
2373
2374 code = gimple_assign_rhs_code (stmt);
2375 if (!CONVERT_EXPR_CODE_P (code)
2376 && code != FIX_TRUNC_EXPR
2377 && code != FLOAT_EXPR
2378 && code != WIDEN_MULT_EXPR
2379 && code != WIDEN_LSHIFT_EXPR)
2380 return false;
2381
2382 op_type = TREE_CODE_LENGTH (code);
2383
2384 /* Check types of lhs and rhs. */
2385 scalar_dest = gimple_assign_lhs (stmt);
2386 lhs_type = TREE_TYPE (scalar_dest);
2387 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2388
2389 op0 = gimple_assign_rhs1 (stmt);
2390 rhs_type = TREE_TYPE (op0);
2391
2392 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2393 && !((INTEGRAL_TYPE_P (lhs_type)
2394 && INTEGRAL_TYPE_P (rhs_type))
2395 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2396 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2397 return false;
2398
2399 if ((INTEGRAL_TYPE_P (lhs_type)
2400 && (TYPE_PRECISION (lhs_type)
2401 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2402 || (INTEGRAL_TYPE_P (rhs_type)
2403 && (TYPE_PRECISION (rhs_type)
2404 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2405 {
2406 if (dump_enabled_p ())
2407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2408 "type conversion to/from bit-precision unsupported."
2409 "\n");
2410 return false;
2411 }
2412
2413 /* Check the operands of the operation. */
2414 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2415 &def_stmt, &def, &dt[0], &vectype_in))
2416 {
2417 if (dump_enabled_p ())
2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2419 "use not simple.\n");
2420 return false;
2421 }
2422 if (op_type == binary_op)
2423 {
2424 bool ok;
2425
2426 op1 = gimple_assign_rhs2 (stmt);
2427 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2428 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2429 OP1. */
2430 if (CONSTANT_CLASS_P (op0))
2431 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2432 &def_stmt, &def, &dt[1], &vectype_in);
2433 else
2434 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2435 &def, &dt[1]);
2436
2437 if (!ok)
2438 {
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "use not simple.\n");
2442 return false;
2443 }
2444 }
2445
2446 /* If op0 is an external or constant defs use a vector type of
2447 the same size as the output vector type. */
2448 if (!vectype_in)
2449 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2450 if (vec_stmt)
2451 gcc_assert (vectype_in);
2452 if (!vectype_in)
2453 {
2454 if (dump_enabled_p ())
2455 {
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "no vectype for scalar type ");
2458 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2459 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2460 }
2461
2462 return false;
2463 }
2464
2465 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2466 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2467 if (nunits_in < nunits_out)
2468 modifier = NARROW;
2469 else if (nunits_out == nunits_in)
2470 modifier = NONE;
2471 else
2472 modifier = WIDEN;
2473
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2476 case of SLP. */
2477 if (slp_node || PURE_SLP_STMT (stmt_info))
2478 ncopies = 1;
2479 else if (modifier == NARROW)
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2481 else
2482 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2483
2484 /* Sanity check: make sure that at least one copy of the vectorized stmt
2485 needs to be generated. */
2486 gcc_assert (ncopies >= 1);
2487
2488 /* Supportable by target? */
2489 switch (modifier)
2490 {
2491 case NONE:
2492 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2493 return false;
2494 if (supportable_convert_operation (code, vectype_out, vectype_in,
2495 &decl1, &code1))
2496 break;
2497 /* FALLTHRU */
2498 unsupported:
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "conversion not supported by target.\n");
2502 return false;
2503
2504 case WIDEN:
2505 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2506 &code1, &code2, &multi_step_cvt,
2507 &interm_types))
2508 {
2509 /* Binary widening operation can only be supported directly by the
2510 architecture. */
2511 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2512 break;
2513 }
2514
2515 if (code != FLOAT_EXPR
2516 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2517 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2518 goto unsupported;
2519
2520 rhs_mode = TYPE_MODE (rhs_type);
2521 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2522 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2523 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2524 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2525 {
2526 cvt_type
2527 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2528 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2529 if (cvt_type == NULL_TREE)
2530 goto unsupported;
2531
2532 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2533 {
2534 if (!supportable_convert_operation (code, vectype_out,
2535 cvt_type, &decl1, &codecvt1))
2536 goto unsupported;
2537 }
2538 else if (!supportable_widening_operation (code, stmt, vectype_out,
2539 cvt_type, &codecvt1,
2540 &codecvt2, &multi_step_cvt,
2541 &interm_types))
2542 continue;
2543 else
2544 gcc_assert (multi_step_cvt == 0);
2545
2546 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2547 vectype_in, &code1, &code2,
2548 &multi_step_cvt, &interm_types))
2549 break;
2550 }
2551
2552 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2553 goto unsupported;
2554
2555 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2556 codecvt2 = ERROR_MARK;
2557 else
2558 {
2559 multi_step_cvt++;
2560 interm_types.safe_push (cvt_type);
2561 cvt_type = NULL_TREE;
2562 }
2563 break;
2564
2565 case NARROW:
2566 gcc_assert (op_type == unary_op);
2567 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2568 &code1, &multi_step_cvt,
2569 &interm_types))
2570 break;
2571
2572 if (code != FIX_TRUNC_EXPR
2573 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2574 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2575 goto unsupported;
2576
2577 rhs_mode = TYPE_MODE (rhs_type);
2578 cvt_type
2579 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2580 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2581 if (cvt_type == NULL_TREE)
2582 goto unsupported;
2583 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2584 &decl1, &codecvt1))
2585 goto unsupported;
2586 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2587 &code1, &multi_step_cvt,
2588 &interm_types))
2589 break;
2590 goto unsupported;
2591
2592 default:
2593 gcc_unreachable ();
2594 }
2595
2596 if (!vec_stmt) /* transformation not required. */
2597 {
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE, vect_location,
2600 "=== vectorizable_conversion ===\n");
2601 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2602 {
2603 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2604 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2605 }
2606 else if (modifier == NARROW)
2607 {
2608 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2609 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2610 }
2611 else
2612 {
2613 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2614 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2615 }
2616 interm_types.release ();
2617 return true;
2618 }
2619
2620 /** Transform. **/
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_NOTE, vect_location,
2623 "transform conversion. ncopies = %d.\n", ncopies);
2624
2625 if (op_type == binary_op)
2626 {
2627 if (CONSTANT_CLASS_P (op0))
2628 op0 = fold_convert (TREE_TYPE (op1), op0);
2629 else if (CONSTANT_CLASS_P (op1))
2630 op1 = fold_convert (TREE_TYPE (op0), op1);
2631 }
2632
2633 /* In case of multi-step conversion, we first generate conversion operations
2634 to the intermediate types, and then from that types to the final one.
2635 We create vector destinations for the intermediate type (TYPES) received
2636 from supportable_*_operation, and store them in the correct order
2637 for future use in vect_create_vectorized_*_stmts (). */
2638 vec_dsts.create (multi_step_cvt + 1);
2639 vec_dest = vect_create_destination_var (scalar_dest,
2640 (cvt_type && modifier == WIDEN)
2641 ? cvt_type : vectype_out);
2642 vec_dsts.quick_push (vec_dest);
2643
2644 if (multi_step_cvt)
2645 {
2646 for (i = interm_types.length () - 1;
2647 interm_types.iterate (i, &intermediate_type); i--)
2648 {
2649 vec_dest = vect_create_destination_var (scalar_dest,
2650 intermediate_type);
2651 vec_dsts.quick_push (vec_dest);
2652 }
2653 }
2654
2655 if (cvt_type)
2656 vec_dest = vect_create_destination_var (scalar_dest,
2657 modifier == WIDEN
2658 ? vectype_out : cvt_type);
2659
2660 if (!slp_node)
2661 {
2662 if (modifier == WIDEN)
2663 {
2664 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
2665 if (op_type == binary_op)
2666 vec_oprnds1.create (1);
2667 }
2668 else if (modifier == NARROW)
2669 vec_oprnds0.create (
2670 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2671 }
2672 else if (code == WIDEN_LSHIFT_EXPR)
2673 vec_oprnds1.create (slp_node->vec_stmts_size);
2674
2675 last_oprnd = op0;
2676 prev_stmt_info = NULL;
2677 switch (modifier)
2678 {
2679 case NONE:
2680 for (j = 0; j < ncopies; j++)
2681 {
2682 if (j == 0)
2683 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2684 -1);
2685 else
2686 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2687
2688 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2689 {
2690 /* Arguments are ready, create the new vector stmt. */
2691 if (code1 == CALL_EXPR)
2692 {
2693 new_stmt = gimple_build_call (decl1, 1, vop0);
2694 new_temp = make_ssa_name (vec_dest, new_stmt);
2695 gimple_call_set_lhs (new_stmt, new_temp);
2696 }
2697 else
2698 {
2699 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2700 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2701 vop0, NULL);
2702 new_temp = make_ssa_name (vec_dest, new_stmt);
2703 gimple_assign_set_lhs (new_stmt, new_temp);
2704 }
2705
2706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2707 if (slp_node)
2708 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2709 }
2710
2711 if (j == 0)
2712 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2713 else
2714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2715 prev_stmt_info = vinfo_for_stmt (new_stmt);
2716 }
2717 break;
2718
2719 case WIDEN:
2720 /* In case the vectorization factor (VF) is bigger than the number
2721 of elements that we can fit in a vectype (nunits), we have to
2722 generate more than one vector stmt - i.e - we need to "unroll"
2723 the vector stmt by a factor VF/nunits. */
2724 for (j = 0; j < ncopies; j++)
2725 {
2726 /* Handle uses. */
2727 if (j == 0)
2728 {
2729 if (slp_node)
2730 {
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 {
2733 unsigned int k;
2734
2735 vec_oprnd1 = op1;
2736 /* Store vec_oprnd1 for every vector stmt to be created
2737 for SLP_NODE. We check during the analysis that all
2738 the shift arguments are the same. */
2739 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2740 vec_oprnds1.quick_push (vec_oprnd1);
2741
2742 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2743 slp_node, -1);
2744 }
2745 else
2746 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2747 &vec_oprnds1, slp_node, -1);
2748 }
2749 else
2750 {
2751 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2752 vec_oprnds0.quick_push (vec_oprnd0);
2753 if (op_type == binary_op)
2754 {
2755 if (code == WIDEN_LSHIFT_EXPR)
2756 vec_oprnd1 = op1;
2757 else
2758 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2759 NULL);
2760 vec_oprnds1.quick_push (vec_oprnd1);
2761 }
2762 }
2763 }
2764 else
2765 {
2766 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2767 vec_oprnds0.truncate (0);
2768 vec_oprnds0.quick_push (vec_oprnd0);
2769 if (op_type == binary_op)
2770 {
2771 if (code == WIDEN_LSHIFT_EXPR)
2772 vec_oprnd1 = op1;
2773 else
2774 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2775 vec_oprnd1);
2776 vec_oprnds1.truncate (0);
2777 vec_oprnds1.quick_push (vec_oprnd1);
2778 }
2779 }
2780
2781 /* Arguments are ready. Create the new vector stmts. */
2782 for (i = multi_step_cvt; i >= 0; i--)
2783 {
2784 tree this_dest = vec_dsts[i];
2785 enum tree_code c1 = code1, c2 = code2;
2786 if (i == 0 && codecvt2 != ERROR_MARK)
2787 {
2788 c1 = codecvt1;
2789 c2 = codecvt2;
2790 }
2791 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2792 &vec_oprnds1,
2793 stmt, this_dest, gsi,
2794 c1, c2, decl1, decl2,
2795 op_type);
2796 }
2797
2798 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2799 {
2800 if (cvt_type)
2801 {
2802 if (codecvt1 == CALL_EXPR)
2803 {
2804 new_stmt = gimple_build_call (decl1, 1, vop0);
2805 new_temp = make_ssa_name (vec_dest, new_stmt);
2806 gimple_call_set_lhs (new_stmt, new_temp);
2807 }
2808 else
2809 {
2810 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2811 new_temp = make_ssa_name (vec_dest, NULL);
2812 new_stmt = gimple_build_assign_with_ops (codecvt1,
2813 new_temp,
2814 vop0, NULL);
2815 }
2816
2817 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2818 }
2819 else
2820 new_stmt = SSA_NAME_DEF_STMT (vop0);
2821
2822 if (slp_node)
2823 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2824 else
2825 {
2826 if (!prev_stmt_info)
2827 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2828 else
2829 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2830 prev_stmt_info = vinfo_for_stmt (new_stmt);
2831 }
2832 }
2833 }
2834
2835 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2836 break;
2837
2838 case NARROW:
2839 /* In case the vectorization factor (VF) is bigger than the number
2840 of elements that we can fit in a vectype (nunits), we have to
2841 generate more than one vector stmt - i.e - we need to "unroll"
2842 the vector stmt by a factor VF/nunits. */
2843 for (j = 0; j < ncopies; j++)
2844 {
2845 /* Handle uses. */
2846 if (slp_node)
2847 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2848 slp_node, -1);
2849 else
2850 {
2851 vec_oprnds0.truncate (0);
2852 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2853 vect_pow2 (multi_step_cvt) - 1);
2854 }
2855
2856 /* Arguments are ready. Create the new vector stmts. */
2857 if (cvt_type)
2858 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2859 {
2860 if (codecvt1 == CALL_EXPR)
2861 {
2862 new_stmt = gimple_build_call (decl1, 1, vop0);
2863 new_temp = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, new_temp);
2865 }
2866 else
2867 {
2868 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2869 new_temp = make_ssa_name (vec_dest, NULL);
2870 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2871 vop0, NULL);
2872 }
2873
2874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2875 vec_oprnds0[i] = new_temp;
2876 }
2877
2878 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2879 stmt, vec_dsts, gsi,
2880 slp_node, code1,
2881 &prev_stmt_info);
2882 }
2883
2884 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2885 break;
2886 }
2887
2888 vec_oprnds0.release ();
2889 vec_oprnds1.release ();
2890 vec_dsts.release ();
2891 interm_types.release ();
2892
2893 return true;
2894 }
2895
2896
2897 /* Function vectorizable_assignment.
2898
2899 Check if STMT performs an assignment (copy) that can be vectorized.
2900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2902 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2903
2904 static bool
2905 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2906 gimple *vec_stmt, slp_tree slp_node)
2907 {
2908 tree vec_dest;
2909 tree scalar_dest;
2910 tree op;
2911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2913 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2914 tree new_temp;
2915 tree def;
2916 gimple def_stmt;
2917 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2918 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2919 int ncopies;
2920 int i, j;
2921 vec<tree> vec_oprnds = vNULL;
2922 tree vop;
2923 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2924 gimple new_stmt = NULL;
2925 stmt_vec_info prev_stmt_info = NULL;
2926 enum tree_code code;
2927 tree vectype_in;
2928
2929 /* Multiple types in SLP are handled by creating the appropriate number of
2930 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2931 case of SLP. */
2932 if (slp_node || PURE_SLP_STMT (stmt_info))
2933 ncopies = 1;
2934 else
2935 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2936
2937 gcc_assert (ncopies >= 1);
2938
2939 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2940 return false;
2941
2942 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2943 return false;
2944
2945 /* Is vectorizable assignment? */
2946 if (!is_gimple_assign (stmt))
2947 return false;
2948
2949 scalar_dest = gimple_assign_lhs (stmt);
2950 if (TREE_CODE (scalar_dest) != SSA_NAME)
2951 return false;
2952
2953 code = gimple_assign_rhs_code (stmt);
2954 if (gimple_assign_single_p (stmt)
2955 || code == PAREN_EXPR
2956 || CONVERT_EXPR_CODE_P (code))
2957 op = gimple_assign_rhs1 (stmt);
2958 else
2959 return false;
2960
2961 if (code == VIEW_CONVERT_EXPR)
2962 op = TREE_OPERAND (op, 0);
2963
2964 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2965 &def_stmt, &def, &dt[0], &vectype_in))
2966 {
2967 if (dump_enabled_p ())
2968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2969 "use not simple.\n");
2970 return false;
2971 }
2972
2973 /* We can handle NOP_EXPR conversions that do not change the number
2974 of elements or the vector size. */
2975 if ((CONVERT_EXPR_CODE_P (code)
2976 || code == VIEW_CONVERT_EXPR)
2977 && (!vectype_in
2978 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2979 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2980 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2981 return false;
2982
2983 /* We do not handle bit-precision changes. */
2984 if ((CONVERT_EXPR_CODE_P (code)
2985 || code == VIEW_CONVERT_EXPR)
2986 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2987 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2988 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2989 || ((TYPE_PRECISION (TREE_TYPE (op))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2991 /* But a conversion that does not change the bit-pattern is ok. */
2992 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2993 > TYPE_PRECISION (TREE_TYPE (op)))
2994 && TYPE_UNSIGNED (TREE_TYPE (op))))
2995 {
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2998 "type conversion to/from bit-precision "
2999 "unsupported.\n");
3000 return false;
3001 }
3002
3003 if (!vec_stmt) /* transformation not required. */
3004 {
3005 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3006 if (dump_enabled_p ())
3007 dump_printf_loc (MSG_NOTE, vect_location,
3008 "=== vectorizable_assignment ===\n");
3009 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3010 return true;
3011 }
3012
3013 /** Transform. **/
3014 if (dump_enabled_p ())
3015 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
3016
3017 /* Handle def. */
3018 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3019
3020 /* Handle use. */
3021 for (j = 0; j < ncopies; j++)
3022 {
3023 /* Handle uses. */
3024 if (j == 0)
3025 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3026 else
3027 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3028
3029 /* Arguments are ready. create the new vector stmt. */
3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3031 {
3032 if (CONVERT_EXPR_CODE_P (code)
3033 || code == VIEW_CONVERT_EXPR)
3034 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3035 new_stmt = gimple_build_assign (vec_dest, vop);
3036 new_temp = make_ssa_name (vec_dest, new_stmt);
3037 gimple_assign_set_lhs (new_stmt, new_temp);
3038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039 if (slp_node)
3040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3041 }
3042
3043 if (slp_node)
3044 continue;
3045
3046 if (j == 0)
3047 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3048 else
3049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3050
3051 prev_stmt_info = vinfo_for_stmt (new_stmt);
3052 }
3053
3054 vec_oprnds.release ();
3055 return true;
3056 }
3057
3058
3059 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3060 either as shift by a scalar or by a vector. */
3061
3062 bool
3063 vect_supportable_shift (enum tree_code code, tree scalar_type)
3064 {
3065
3066 enum machine_mode vec_mode;
3067 optab optab;
3068 int icode;
3069 tree vectype;
3070
3071 vectype = get_vectype_for_scalar_type (scalar_type);
3072 if (!vectype)
3073 return false;
3074
3075 optab = optab_for_tree_code (code, vectype, optab_scalar);
3076 if (!optab
3077 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3078 {
3079 optab = optab_for_tree_code (code, vectype, optab_vector);
3080 if (!optab
3081 || (optab_handler (optab, TYPE_MODE (vectype))
3082 == CODE_FOR_nothing))
3083 return false;
3084 }
3085
3086 vec_mode = TYPE_MODE (vectype);
3087 icode = (int) optab_handler (optab, vec_mode);
3088 if (icode == CODE_FOR_nothing)
3089 return false;
3090
3091 return true;
3092 }
3093
3094
3095 /* Function vectorizable_shift.
3096
3097 Check if STMT performs a shift operation that can be vectorized.
3098 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3099 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3100 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3101
3102 static bool
3103 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3104 gimple *vec_stmt, slp_tree slp_node)
3105 {
3106 tree vec_dest;
3107 tree scalar_dest;
3108 tree op0, op1 = NULL;
3109 tree vec_oprnd1 = NULL_TREE;
3110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3111 tree vectype;
3112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3113 enum tree_code code;
3114 enum machine_mode vec_mode;
3115 tree new_temp;
3116 optab optab;
3117 int icode;
3118 enum machine_mode optab_op2_mode;
3119 tree def;
3120 gimple def_stmt;
3121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3122 gimple new_stmt = NULL;
3123 stmt_vec_info prev_stmt_info;
3124 int nunits_in;
3125 int nunits_out;
3126 tree vectype_out;
3127 tree op1_vectype;
3128 int ncopies;
3129 int j, i;
3130 vec<tree> vec_oprnds0 = vNULL;
3131 vec<tree> vec_oprnds1 = vNULL;
3132 tree vop0, vop1;
3133 unsigned int k;
3134 bool scalar_shift_arg = true;
3135 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3136 int vf;
3137
3138 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3139 return false;
3140
3141 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3142 return false;
3143
3144 /* Is STMT a vectorizable binary/unary operation? */
3145 if (!is_gimple_assign (stmt))
3146 return false;
3147
3148 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3149 return false;
3150
3151 code = gimple_assign_rhs_code (stmt);
3152
3153 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3154 || code == RROTATE_EXPR))
3155 return false;
3156
3157 scalar_dest = gimple_assign_lhs (stmt);
3158 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3159 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3160 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3161 {
3162 if (dump_enabled_p ())
3163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3164 "bit-precision shifts not supported.\n");
3165 return false;
3166 }
3167
3168 op0 = gimple_assign_rhs1 (stmt);
3169 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3170 &def_stmt, &def, &dt[0], &vectype))
3171 {
3172 if (dump_enabled_p ())
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3174 "use not simple.\n");
3175 return false;
3176 }
3177 /* If op0 is an external or constant def use a vector type with
3178 the same size as the output vector type. */
3179 if (!vectype)
3180 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3181 if (vec_stmt)
3182 gcc_assert (vectype);
3183 if (!vectype)
3184 {
3185 if (dump_enabled_p ())
3186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3187 "no vectype for scalar type\n");
3188 return false;
3189 }
3190
3191 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3192 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3193 if (nunits_out != nunits_in)
3194 return false;
3195
3196 op1 = gimple_assign_rhs2 (stmt);
3197 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3198 &def, &dt[1], &op1_vectype))
3199 {
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3202 "use not simple.\n");
3203 return false;
3204 }
3205
3206 if (loop_vinfo)
3207 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3208 else
3209 vf = 1;
3210
3211 /* Multiple types in SLP are handled by creating the appropriate number of
3212 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3213 case of SLP. */
3214 if (slp_node || PURE_SLP_STMT (stmt_info))
3215 ncopies = 1;
3216 else
3217 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3218
3219 gcc_assert (ncopies >= 1);
3220
3221 /* Determine whether the shift amount is a vector, or scalar. If the
3222 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3223
3224 if (dt[1] == vect_internal_def && !slp_node)
3225 scalar_shift_arg = false;
3226 else if (dt[1] == vect_constant_def
3227 || dt[1] == vect_external_def
3228 || dt[1] == vect_internal_def)
3229 {
3230 /* In SLP, need to check whether the shift count is the same,
3231 in loops if it is a constant or invariant, it is always
3232 a scalar shift. */
3233 if (slp_node)
3234 {
3235 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3236 gimple slpstmt;
3237
3238 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3239 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3240 scalar_shift_arg = false;
3241 }
3242 }
3243 else
3244 {
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "operand mode requires invariant argument.\n");
3248 return false;
3249 }
3250
3251 /* Vector shifted by vector. */
3252 if (!scalar_shift_arg)
3253 {
3254 optab = optab_for_tree_code (code, vectype, optab_vector);
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_NOTE, vect_location,
3257 "vector/vector shift/rotate found.\n");
3258
3259 if (!op1_vectype)
3260 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3261 if (op1_vectype == NULL_TREE
3262 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3263 {
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 "unusable type for last operand in"
3267 " vector/vector shift/rotate.\n");
3268 return false;
3269 }
3270 }
3271 /* See if the machine has a vector shifted by scalar insn and if not
3272 then see if it has a vector shifted by vector insn. */
3273 else
3274 {
3275 optab = optab_for_tree_code (code, vectype, optab_scalar);
3276 if (optab
3277 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3278 {
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_NOTE, vect_location,
3281 "vector/scalar shift/rotate found.\n");
3282 }
3283 else
3284 {
3285 optab = optab_for_tree_code (code, vectype, optab_vector);
3286 if (optab
3287 && (optab_handler (optab, TYPE_MODE (vectype))
3288 != CODE_FOR_nothing))
3289 {
3290 scalar_shift_arg = false;
3291
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE, vect_location,
3294 "vector/vector shift/rotate found.\n");
3295
3296 /* Unlike the other binary operators, shifts/rotates have
3297 the rhs being int, instead of the same type as the lhs,
3298 so make sure the scalar is the right type if we are
3299 dealing with vectors of long long/long/short/char. */
3300 if (dt[1] == vect_constant_def)
3301 op1 = fold_convert (TREE_TYPE (vectype), op1);
3302 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3303 TREE_TYPE (op1)))
3304 {
3305 if (slp_node
3306 && TYPE_MODE (TREE_TYPE (vectype))
3307 != TYPE_MODE (TREE_TYPE (op1)))
3308 {
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "unusable type for last operand in"
3312 " vector/vector shift/rotate.\n");
3313 return false;
3314 }
3315 if (vec_stmt && !slp_node)
3316 {
3317 op1 = fold_convert (TREE_TYPE (vectype), op1);
3318 op1 = vect_init_vector (stmt, op1,
3319 TREE_TYPE (vectype), NULL);
3320 }
3321 }
3322 }
3323 }
3324 }
3325
3326 /* Supportable by target? */
3327 if (!optab)
3328 {
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3331 "no optab.\n");
3332 return false;
3333 }
3334 vec_mode = TYPE_MODE (vectype);
3335 icode = (int) optab_handler (optab, vec_mode);
3336 if (icode == CODE_FOR_nothing)
3337 {
3338 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3340 "op not supported by target.\n");
3341 /* Check only during analysis. */
3342 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3343 || (vf < vect_min_worthwhile_factor (code)
3344 && !vec_stmt))
3345 return false;
3346 if (dump_enabled_p ())
3347 dump_printf_loc (MSG_NOTE, vect_location,
3348 "proceeding using word mode.\n");
3349 }
3350
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3353 && vf < vect_min_worthwhile_factor (code)
3354 && !vec_stmt)
3355 {
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "not worthwhile without SIMD support.\n");
3359 return false;
3360 }
3361
3362 if (!vec_stmt) /* transformation not required. */
3363 {
3364 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE, vect_location,
3367 "=== vectorizable_shift ===\n");
3368 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3369 return true;
3370 }
3371
3372 /** Transform. **/
3373
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_NOTE, vect_location,
3376 "transform binary/unary operation.\n");
3377
3378 /* Handle def. */
3379 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3380
3381 prev_stmt_info = NULL;
3382 for (j = 0; j < ncopies; j++)
3383 {
3384 /* Handle uses. */
3385 if (j == 0)
3386 {
3387 if (scalar_shift_arg)
3388 {
3389 /* Vector shl and shr insn patterns can be defined with scalar
3390 operand 2 (shift operand). In this case, use constant or loop
3391 invariant op1 directly, without extending it to vector mode
3392 first. */
3393 optab_op2_mode = insn_data[icode].operand[2].mode;
3394 if (!VECTOR_MODE_P (optab_op2_mode))
3395 {
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_NOTE, vect_location,
3398 "operand 1 using scalar mode.\n");
3399 vec_oprnd1 = op1;
3400 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3401 vec_oprnds1.quick_push (vec_oprnd1);
3402 if (slp_node)
3403 {
3404 /* Store vec_oprnd1 for every vector stmt to be created
3405 for SLP_NODE. We check during the analysis that all
3406 the shift arguments are the same.
3407 TODO: Allow different constants for different vector
3408 stmts generated for an SLP instance. */
3409 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3410 vec_oprnds1.quick_push (vec_oprnd1);
3411 }
3412 }
3413 }
3414
3415 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3416 (a special case for certain kind of vector shifts); otherwise,
3417 operand 1 should be of a vector type (the usual case). */
3418 if (vec_oprnd1)
3419 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3420 slp_node, -1);
3421 else
3422 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3423 slp_node, -1);
3424 }
3425 else
3426 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3427
3428 /* Arguments are ready. Create the new vector stmt. */
3429 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3430 {
3431 vop1 = vec_oprnds1[i];
3432 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3433 new_temp = make_ssa_name (vec_dest, new_stmt);
3434 gimple_assign_set_lhs (new_stmt, new_temp);
3435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3436 if (slp_node)
3437 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3438 }
3439
3440 if (slp_node)
3441 continue;
3442
3443 if (j == 0)
3444 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3445 else
3446 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3447 prev_stmt_info = vinfo_for_stmt (new_stmt);
3448 }
3449
3450 vec_oprnds0.release ();
3451 vec_oprnds1.release ();
3452
3453 return true;
3454 }
3455
3456
3457 static tree permute_vec_elements (tree, tree, tree, gimple,
3458 gimple_stmt_iterator *);
3459
3460
3461 /* Function vectorizable_operation.
3462
3463 Check if STMT performs a binary, unary or ternary operation that can
3464 be vectorized.
3465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3468
3469 static bool
3470 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3471 gimple *vec_stmt, slp_tree slp_node)
3472 {
3473 tree vec_dest;
3474 tree scalar_dest;
3475 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3477 tree vectype;
3478 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3479 enum tree_code code;
3480 enum machine_mode vec_mode;
3481 tree new_temp;
3482 int op_type;
3483 optab optab;
3484 int icode;
3485 tree def;
3486 gimple def_stmt;
3487 enum vect_def_type dt[3]
3488 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3489 gimple new_stmt = NULL;
3490 stmt_vec_info prev_stmt_info;
3491 int nunits_in;
3492 int nunits_out;
3493 tree vectype_out;
3494 int ncopies;
3495 int j, i;
3496 vec<tree> vec_oprnds0 = vNULL;
3497 vec<tree> vec_oprnds1 = vNULL;
3498 vec<tree> vec_oprnds2 = vNULL;
3499 tree vop0, vop1, vop2;
3500 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3501 int vf;
3502
3503 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3504 return false;
3505
3506 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3507 return false;
3508
3509 /* Is STMT a vectorizable binary/unary operation? */
3510 if (!is_gimple_assign (stmt))
3511 return false;
3512
3513 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3514 return false;
3515
3516 code = gimple_assign_rhs_code (stmt);
3517
3518 /* For pointer addition, we should use the normal plus for
3519 the vector addition. */
3520 if (code == POINTER_PLUS_EXPR)
3521 code = PLUS_EXPR;
3522
3523 /* Support only unary or binary operations. */
3524 op_type = TREE_CODE_LENGTH (code);
3525 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3526 {
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529 "num. args = %d (not unary/binary/ternary op).\n",
3530 op_type);
3531 return false;
3532 }
3533
3534 scalar_dest = gimple_assign_lhs (stmt);
3535 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3536
3537 /* Most operations cannot handle bit-precision types without extra
3538 truncations. */
3539 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3540 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3541 /* Exception are bitwise binary operations. */
3542 && code != BIT_IOR_EXPR
3543 && code != BIT_XOR_EXPR
3544 && code != BIT_AND_EXPR)
3545 {
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "bit-precision arithmetic not supported.\n");
3549 return false;
3550 }
3551
3552 op0 = gimple_assign_rhs1 (stmt);
3553 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3554 &def_stmt, &def, &dt[0], &vectype))
3555 {
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "use not simple.\n");
3559 return false;
3560 }
3561 /* If op0 is an external or constant def use a vector type with
3562 the same size as the output vector type. */
3563 if (!vectype)
3564 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3565 if (vec_stmt)
3566 gcc_assert (vectype);
3567 if (!vectype)
3568 {
3569 if (dump_enabled_p ())
3570 {
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3572 "no vectype for scalar type ");
3573 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3574 TREE_TYPE (op0));
3575 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3576 }
3577
3578 return false;
3579 }
3580
3581 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3582 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3583 if (nunits_out != nunits_in)
3584 return false;
3585
3586 if (op_type == binary_op || op_type == ternary_op)
3587 {
3588 op1 = gimple_assign_rhs2 (stmt);
3589 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3590 &def, &dt[1]))
3591 {
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "use not simple.\n");
3595 return false;
3596 }
3597 }
3598 if (op_type == ternary_op)
3599 {
3600 op2 = gimple_assign_rhs3 (stmt);
3601 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3602 &def, &dt[2]))
3603 {
3604 if (dump_enabled_p ())
3605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3606 "use not simple.\n");
3607 return false;
3608 }
3609 }
3610
3611 if (loop_vinfo)
3612 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3613 else
3614 vf = 1;
3615
3616 /* Multiple types in SLP are handled by creating the appropriate number of
3617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3618 case of SLP. */
3619 if (slp_node || PURE_SLP_STMT (stmt_info))
3620 ncopies = 1;
3621 else
3622 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3623
3624 gcc_assert (ncopies >= 1);
3625
3626 /* Shifts are handled in vectorizable_shift (). */
3627 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3628 || code == RROTATE_EXPR)
3629 return false;
3630
3631 /* Supportable by target? */
3632
3633 vec_mode = TYPE_MODE (vectype);
3634 if (code == MULT_HIGHPART_EXPR)
3635 {
3636 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3637 icode = LAST_INSN_CODE;
3638 else
3639 icode = CODE_FOR_nothing;
3640 }
3641 else
3642 {
3643 optab = optab_for_tree_code (code, vectype, optab_default);
3644 if (!optab)
3645 {
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3648 "no optab.\n");
3649 return false;
3650 }
3651 icode = (int) optab_handler (optab, vec_mode);
3652 }
3653
3654 if (icode == CODE_FOR_nothing)
3655 {
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3658 "op not supported by target.\n");
3659 /* Check only during analysis. */
3660 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3661 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3662 return false;
3663 if (dump_enabled_p ())
3664 dump_printf_loc (MSG_NOTE, vect_location,
3665 "proceeding using word mode.\n");
3666 }
3667
3668 /* Worthwhile without SIMD support? Check only during analysis. */
3669 if (!VECTOR_MODE_P (vec_mode)
3670 && !vec_stmt
3671 && vf < vect_min_worthwhile_factor (code))
3672 {
3673 if (dump_enabled_p ())
3674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3675 "not worthwhile without SIMD support.\n");
3676 return false;
3677 }
3678
3679 if (!vec_stmt) /* transformation not required. */
3680 {
3681 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3682 if (dump_enabled_p ())
3683 dump_printf_loc (MSG_NOTE, vect_location,
3684 "=== vectorizable_operation ===\n");
3685 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3686 return true;
3687 }
3688
3689 /** Transform. **/
3690
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_NOTE, vect_location,
3693 "transform binary/unary operation.\n");
3694
3695 /* Handle def. */
3696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3697
3698 /* In case the vectorization factor (VF) is bigger than the number
3699 of elements that we can fit in a vectype (nunits), we have to generate
3700 more than one vector stmt - i.e - we need to "unroll" the
3701 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3702 from one copy of the vector stmt to the next, in the field
3703 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3704 stages to find the correct vector defs to be used when vectorizing
3705 stmts that use the defs of the current stmt. The example below
3706 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3707 we need to create 4 vectorized stmts):
3708
3709 before vectorization:
3710 RELATED_STMT VEC_STMT
3711 S1: x = memref - -
3712 S2: z = x + 1 - -
3713
3714 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3715 there):
3716 RELATED_STMT VEC_STMT
3717 VS1_0: vx0 = memref0 VS1_1 -
3718 VS1_1: vx1 = memref1 VS1_2 -
3719 VS1_2: vx2 = memref2 VS1_3 -
3720 VS1_3: vx3 = memref3 - -
3721 S1: x = load - VS1_0
3722 S2: z = x + 1 - -
3723
3724 step2: vectorize stmt S2 (done here):
3725 To vectorize stmt S2 we first need to find the relevant vector
3726 def for the first operand 'x'. This is, as usual, obtained from
3727 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3728 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3729 relevant vector def 'vx0'. Having found 'vx0' we can generate
3730 the vector stmt VS2_0, and as usual, record it in the
3731 STMT_VINFO_VEC_STMT of stmt S2.
3732 When creating the second copy (VS2_1), we obtain the relevant vector
3733 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3734 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3735 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3736 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3737 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3738 chain of stmts and pointers:
3739 RELATED_STMT VEC_STMT
3740 VS1_0: vx0 = memref0 VS1_1 -
3741 VS1_1: vx1 = memref1 VS1_2 -
3742 VS1_2: vx2 = memref2 VS1_3 -
3743 VS1_3: vx3 = memref3 - -
3744 S1: x = load - VS1_0
3745 VS2_0: vz0 = vx0 + v1 VS2_1 -
3746 VS2_1: vz1 = vx1 + v1 VS2_2 -
3747 VS2_2: vz2 = vx2 + v1 VS2_3 -
3748 VS2_3: vz3 = vx3 + v1 - -
3749 S2: z = x + 1 - VS2_0 */
3750
3751 prev_stmt_info = NULL;
3752 for (j = 0; j < ncopies; j++)
3753 {
3754 /* Handle uses. */
3755 if (j == 0)
3756 {
3757 if (op_type == binary_op || op_type == ternary_op)
3758 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3759 slp_node, -1);
3760 else
3761 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3762 slp_node, -1);
3763 if (op_type == ternary_op)
3764 {
3765 vec_oprnds2.create (1);
3766 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3767 stmt,
3768 NULL));
3769 }
3770 }
3771 else
3772 {
3773 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3774 if (op_type == ternary_op)
3775 {
3776 tree vec_oprnd = vec_oprnds2.pop ();
3777 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3778 vec_oprnd));
3779 }
3780 }
3781
3782 /* Arguments are ready. Create the new vector stmt. */
3783 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3784 {
3785 vop1 = ((op_type == binary_op || op_type == ternary_op)
3786 ? vec_oprnds1[i] : NULL_TREE);
3787 vop2 = ((op_type == ternary_op)
3788 ? vec_oprnds2[i] : NULL_TREE);
3789 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3790 vop0, vop1, vop2);
3791 new_temp = make_ssa_name (vec_dest, new_stmt);
3792 gimple_assign_set_lhs (new_stmt, new_temp);
3793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3794 if (slp_node)
3795 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3796 }
3797
3798 if (slp_node)
3799 continue;
3800
3801 if (j == 0)
3802 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3803 else
3804 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3805 prev_stmt_info = vinfo_for_stmt (new_stmt);
3806 }
3807
3808 vec_oprnds0.release ();
3809 vec_oprnds1.release ();
3810 vec_oprnds2.release ();
3811
3812 return true;
3813 }
3814
3815 /* A helper function to ensure data reference DR's base alignment
3816 for STMT_INFO. */
3817
3818 static void
3819 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3820 {
3821 if (!dr->aux)
3822 return;
3823
3824 if (((dataref_aux *)dr->aux)->base_misaligned)
3825 {
3826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3827 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3828
3829 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3830 DECL_USER_ALIGN (base_decl) = 1;
3831 ((dataref_aux *)dr->aux)->base_misaligned = false;
3832 }
3833 }
3834
3835
3836 /* Function vectorizable_store.
3837
3838 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3839 can be vectorized.
3840 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3841 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3842 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3843
3844 static bool
3845 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3846 slp_tree slp_node)
3847 {
3848 tree scalar_dest;
3849 tree data_ref;
3850 tree op;
3851 tree vec_oprnd = NULL_TREE;
3852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3853 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3855 tree elem_type;
3856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3857 struct loop *loop = NULL;
3858 enum machine_mode vec_mode;
3859 tree dummy;
3860 enum dr_alignment_support alignment_support_scheme;
3861 tree def;
3862 gimple def_stmt;
3863 enum vect_def_type dt;
3864 stmt_vec_info prev_stmt_info = NULL;
3865 tree dataref_ptr = NULL_TREE;
3866 tree dataref_offset = NULL_TREE;
3867 gimple ptr_incr = NULL;
3868 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3869 int ncopies;
3870 int j;
3871 gimple next_stmt, first_stmt = NULL;
3872 bool grouped_store = false;
3873 bool store_lanes_p = false;
3874 unsigned int group_size, i;
3875 vec<tree> dr_chain = vNULL;
3876 vec<tree> oprnds = vNULL;
3877 vec<tree> result_chain = vNULL;
3878 bool inv_p;
3879 vec<tree> vec_oprnds = vNULL;
3880 bool slp = (slp_node != NULL);
3881 unsigned int vec_num;
3882 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3883 tree aggr_type;
3884
3885 if (loop_vinfo)
3886 loop = LOOP_VINFO_LOOP (loop_vinfo);
3887
3888 /* Multiple types in SLP are handled by creating the appropriate number of
3889 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3890 case of SLP. */
3891 if (slp || PURE_SLP_STMT (stmt_info))
3892 ncopies = 1;
3893 else
3894 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3895
3896 gcc_assert (ncopies >= 1);
3897
3898 /* FORNOW. This restriction should be relaxed. */
3899 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3900 {
3901 if (dump_enabled_p ())
3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3903 "multiple types in nested loop.\n");
3904 return false;
3905 }
3906
3907 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3908 return false;
3909
3910 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3911 return false;
3912
3913 /* Is vectorizable store? */
3914
3915 if (!is_gimple_assign (stmt))
3916 return false;
3917
3918 scalar_dest = gimple_assign_lhs (stmt);
3919 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3920 && is_pattern_stmt_p (stmt_info))
3921 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3922 if (TREE_CODE (scalar_dest) != ARRAY_REF
3923 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3924 && TREE_CODE (scalar_dest) != INDIRECT_REF
3925 && TREE_CODE (scalar_dest) != COMPONENT_REF
3926 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3927 && TREE_CODE (scalar_dest) != REALPART_EXPR
3928 && TREE_CODE (scalar_dest) != MEM_REF)
3929 return false;
3930
3931 gcc_assert (gimple_assign_single_p (stmt));
3932 op = gimple_assign_rhs1 (stmt);
3933 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3934 &def, &dt))
3935 {
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3938 "use not simple.\n");
3939 return false;
3940 }
3941
3942 elem_type = TREE_TYPE (vectype);
3943 vec_mode = TYPE_MODE (vectype);
3944
3945 /* FORNOW. In some cases can vectorize even if data-type not supported
3946 (e.g. - array initialization with 0). */
3947 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3948 return false;
3949
3950 if (!STMT_VINFO_DATA_REF (stmt_info))
3951 return false;
3952
3953 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3954 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3955 size_zero_node) < 0)
3956 {
3957 if (dump_enabled_p ())
3958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3959 "negative step for store.\n");
3960 return false;
3961 }
3962
3963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3964 {
3965 grouped_store = true;
3966 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3967 if (!slp && !PURE_SLP_STMT (stmt_info))
3968 {
3969 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3970 if (vect_store_lanes_supported (vectype, group_size))
3971 store_lanes_p = true;
3972 else if (!vect_grouped_store_supported (vectype, group_size))
3973 return false;
3974 }
3975
3976 if (first_stmt == stmt)
3977 {
3978 /* STMT is the leader of the group. Check the operands of all the
3979 stmts of the group. */
3980 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3981 while (next_stmt)
3982 {
3983 gcc_assert (gimple_assign_single_p (next_stmt));
3984 op = gimple_assign_rhs1 (next_stmt);
3985 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3986 &def_stmt, &def, &dt))
3987 {
3988 if (dump_enabled_p ())
3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3990 "use not simple.\n");
3991 return false;
3992 }
3993 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3994 }
3995 }
3996 }
3997
3998 if (!vec_stmt) /* transformation not required. */
3999 {
4000 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4001 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4002 NULL, NULL, NULL);
4003 return true;
4004 }
4005
4006 /** Transform. **/
4007
4008 ensure_base_align (stmt_info, dr);
4009
4010 if (grouped_store)
4011 {
4012 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4013 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4014
4015 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4016
4017 /* FORNOW */
4018 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
4019
4020 /* We vectorize all the stmts of the interleaving group when we
4021 reach the last stmt in the group. */
4022 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4023 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
4024 && !slp)
4025 {
4026 *vec_stmt = NULL;
4027 return true;
4028 }
4029
4030 if (slp)
4031 {
4032 grouped_store = false;
4033 /* VEC_NUM is the number of vect stmts to be created for this
4034 group. */
4035 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4036 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4037 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4038 op = gimple_assign_rhs1 (first_stmt);
4039 }
4040 else
4041 /* VEC_NUM is the number of vect stmts to be created for this
4042 group. */
4043 vec_num = group_size;
4044 }
4045 else
4046 {
4047 first_stmt = stmt;
4048 first_dr = dr;
4049 group_size = vec_num = 1;
4050 }
4051
4052 if (dump_enabled_p ())
4053 dump_printf_loc (MSG_NOTE, vect_location,
4054 "transform store. ncopies = %d\n", ncopies);
4055
4056 dr_chain.create (group_size);
4057 oprnds.create (group_size);
4058
4059 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4060 gcc_assert (alignment_support_scheme);
4061 /* Targets with store-lane instructions must not require explicit
4062 realignment. */
4063 gcc_assert (!store_lanes_p
4064 || alignment_support_scheme == dr_aligned
4065 || alignment_support_scheme == dr_unaligned_supported);
4066
4067 if (store_lanes_p)
4068 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4069 else
4070 aggr_type = vectype;
4071
4072 /* In case the vectorization factor (VF) is bigger than the number
4073 of elements that we can fit in a vectype (nunits), we have to generate
4074 more than one vector stmt - i.e - we need to "unroll" the
4075 vector stmt by a factor VF/nunits. For more details see documentation in
4076 vect_get_vec_def_for_copy_stmt. */
4077
4078 /* In case of interleaving (non-unit grouped access):
4079
4080 S1: &base + 2 = x2
4081 S2: &base = x0
4082 S3: &base + 1 = x1
4083 S4: &base + 3 = x3
4084
4085 We create vectorized stores starting from base address (the access of the
4086 first stmt in the chain (S2 in the above example), when the last store stmt
4087 of the chain (S4) is reached:
4088
4089 VS1: &base = vx2
4090 VS2: &base + vec_size*1 = vx0
4091 VS3: &base + vec_size*2 = vx1
4092 VS4: &base + vec_size*3 = vx3
4093
4094 Then permutation statements are generated:
4095
4096 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4097 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4098 ...
4099
4100 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4101 (the order of the data-refs in the output of vect_permute_store_chain
4102 corresponds to the order of scalar stmts in the interleaving chain - see
4103 the documentation of vect_permute_store_chain()).
4104
4105 In case of both multiple types and interleaving, above vector stores and
4106 permutation stmts are created for every copy. The result vector stmts are
4107 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4108 STMT_VINFO_RELATED_STMT for the next copies.
4109 */
4110
4111 prev_stmt_info = NULL;
4112 for (j = 0; j < ncopies; j++)
4113 {
4114 gimple new_stmt;
4115
4116 if (j == 0)
4117 {
4118 if (slp)
4119 {
4120 /* Get vectorized arguments for SLP_NODE. */
4121 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4122 NULL, slp_node, -1);
4123
4124 vec_oprnd = vec_oprnds[0];
4125 }
4126 else
4127 {
4128 /* For interleaved stores we collect vectorized defs for all the
4129 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4130 used as an input to vect_permute_store_chain(), and OPRNDS as
4131 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4132
4133 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4134 OPRNDS are of size 1. */
4135 next_stmt = first_stmt;
4136 for (i = 0; i < group_size; i++)
4137 {
4138 /* Since gaps are not supported for interleaved stores,
4139 GROUP_SIZE is the exact number of stmts in the chain.
4140 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4141 there is no interleaving, GROUP_SIZE is 1, and only one
4142 iteration of the loop will be executed. */
4143 gcc_assert (next_stmt
4144 && gimple_assign_single_p (next_stmt));
4145 op = gimple_assign_rhs1 (next_stmt);
4146
4147 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4148 NULL);
4149 dr_chain.quick_push (vec_oprnd);
4150 oprnds.quick_push (vec_oprnd);
4151 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4152 }
4153 }
4154
4155 /* We should have catched mismatched types earlier. */
4156 gcc_assert (useless_type_conversion_p (vectype,
4157 TREE_TYPE (vec_oprnd)));
4158 bool simd_lane_access_p
4159 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4160 if (simd_lane_access_p
4161 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4162 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4163 && integer_zerop (DR_OFFSET (first_dr))
4164 && integer_zerop (DR_INIT (first_dr))
4165 && alias_sets_conflict_p (get_alias_set (aggr_type),
4166 get_alias_set (DR_REF (first_dr))))
4167 {
4168 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4169 dataref_offset = build_int_cst (reference_alias_ptr_type
4170 (DR_REF (first_dr)), 0);
4171 inv_p = false;
4172 }
4173 else
4174 dataref_ptr
4175 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4176 simd_lane_access_p ? loop : NULL,
4177 NULL_TREE, &dummy, gsi, &ptr_incr,
4178 simd_lane_access_p, &inv_p);
4179 gcc_assert (bb_vinfo || !inv_p);
4180 }
4181 else
4182 {
4183 /* For interleaved stores we created vectorized defs for all the
4184 defs stored in OPRNDS in the previous iteration (previous copy).
4185 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4186 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4187 next copy.
4188 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4189 OPRNDS are of size 1. */
4190 for (i = 0; i < group_size; i++)
4191 {
4192 op = oprnds[i];
4193 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4194 &def, &dt);
4195 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4196 dr_chain[i] = vec_oprnd;
4197 oprnds[i] = vec_oprnd;
4198 }
4199 if (dataref_offset)
4200 dataref_offset
4201 = int_const_binop (PLUS_EXPR, dataref_offset,
4202 TYPE_SIZE_UNIT (aggr_type));
4203 else
4204 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4205 TYPE_SIZE_UNIT (aggr_type));
4206 }
4207
4208 if (store_lanes_p)
4209 {
4210 tree vec_array;
4211
4212 /* Combine all the vectors into an array. */
4213 vec_array = create_vector_array (vectype, vec_num);
4214 for (i = 0; i < vec_num; i++)
4215 {
4216 vec_oprnd = dr_chain[i];
4217 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4218 }
4219
4220 /* Emit:
4221 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4222 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4223 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4224 gimple_call_set_lhs (new_stmt, data_ref);
4225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4226 }
4227 else
4228 {
4229 new_stmt = NULL;
4230 if (grouped_store)
4231 {
4232 if (j == 0)
4233 result_chain.create (group_size);
4234 /* Permute. */
4235 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4236 &result_chain);
4237 }
4238
4239 next_stmt = first_stmt;
4240 for (i = 0; i < vec_num; i++)
4241 {
4242 unsigned align, misalign;
4243
4244 if (i > 0)
4245 /* Bump the vector pointer. */
4246 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4247 stmt, NULL_TREE);
4248
4249 if (slp)
4250 vec_oprnd = vec_oprnds[i];
4251 else if (grouped_store)
4252 /* For grouped stores vectorized defs are interleaved in
4253 vect_permute_store_chain(). */
4254 vec_oprnd = result_chain[i];
4255
4256 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4257 dataref_offset
4258 ? dataref_offset
4259 : build_int_cst (reference_alias_ptr_type
4260 (DR_REF (first_dr)), 0));
4261 align = TYPE_ALIGN_UNIT (vectype);
4262 if (aligned_access_p (first_dr))
4263 misalign = 0;
4264 else if (DR_MISALIGNMENT (first_dr) == -1)
4265 {
4266 TREE_TYPE (data_ref)
4267 = build_aligned_type (TREE_TYPE (data_ref),
4268 TYPE_ALIGN (elem_type));
4269 align = TYPE_ALIGN_UNIT (elem_type);
4270 misalign = 0;
4271 }
4272 else
4273 {
4274 TREE_TYPE (data_ref)
4275 = build_aligned_type (TREE_TYPE (data_ref),
4276 TYPE_ALIGN (elem_type));
4277 misalign = DR_MISALIGNMENT (first_dr);
4278 }
4279 if (dataref_offset == NULL_TREE)
4280 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4281 misalign);
4282
4283 /* Arguments are ready. Create the new vector stmt. */
4284 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4286
4287 if (slp)
4288 continue;
4289
4290 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4291 if (!next_stmt)
4292 break;
4293 }
4294 }
4295 if (!slp)
4296 {
4297 if (j == 0)
4298 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4299 else
4300 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4301 prev_stmt_info = vinfo_for_stmt (new_stmt);
4302 }
4303 }
4304
4305 dr_chain.release ();
4306 oprnds.release ();
4307 result_chain.release ();
4308 vec_oprnds.release ();
4309
4310 return true;
4311 }
4312
4313 /* Given a vector type VECTYPE and permutation SEL returns
4314 the VECTOR_CST mask that implements the permutation of the
4315 vector elements. If that is impossible to do, returns NULL. */
4316
4317 tree
4318 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4319 {
4320 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4321 int i, nunits;
4322
4323 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4324
4325 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4326 return NULL;
4327
4328 mask_elt_type = lang_hooks.types.type_for_mode
4329 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4330 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4331
4332 mask_elts = XALLOCAVEC (tree, nunits);
4333 for (i = nunits - 1; i >= 0; i--)
4334 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4335 mask_vec = build_vector (mask_type, mask_elts);
4336
4337 return mask_vec;
4338 }
4339
4340 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4341 reversal of the vector elements. If that is impossible to do,
4342 returns NULL. */
4343
4344 static tree
4345 perm_mask_for_reverse (tree vectype)
4346 {
4347 int i, nunits;
4348 unsigned char *sel;
4349
4350 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4351 sel = XALLOCAVEC (unsigned char, nunits);
4352
4353 for (i = 0; i < nunits; ++i)
4354 sel[i] = nunits - 1 - i;
4355
4356 return vect_gen_perm_mask (vectype, sel);
4357 }
4358
4359 /* Given a vector variable X and Y, that was generated for the scalar
4360 STMT, generate instructions to permute the vector elements of X and Y
4361 using permutation mask MASK_VEC, insert them at *GSI and return the
4362 permuted vector variable. */
4363
4364 static tree
4365 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4366 gimple_stmt_iterator *gsi)
4367 {
4368 tree vectype = TREE_TYPE (x);
4369 tree perm_dest, data_ref;
4370 gimple perm_stmt;
4371
4372 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4373 data_ref = make_ssa_name (perm_dest, NULL);
4374
4375 /* Generate the permute statement. */
4376 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4377 x, y, mask_vec);
4378 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4379
4380 return data_ref;
4381 }
4382
4383 /* vectorizable_load.
4384
4385 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4386 can be vectorized.
4387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4390
4391 static bool
4392 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4393 slp_tree slp_node, slp_instance slp_node_instance)
4394 {
4395 tree scalar_dest;
4396 tree vec_dest = NULL;
4397 tree data_ref = NULL;
4398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4399 stmt_vec_info prev_stmt_info;
4400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4401 struct loop *loop = NULL;
4402 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4403 bool nested_in_vect_loop = false;
4404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4406 tree elem_type;
4407 tree new_temp;
4408 enum machine_mode mode;
4409 gimple new_stmt = NULL;
4410 tree dummy;
4411 enum dr_alignment_support alignment_support_scheme;
4412 tree dataref_ptr = NULL_TREE;
4413 tree dataref_offset = NULL_TREE;
4414 gimple ptr_incr = NULL;
4415 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4416 int ncopies;
4417 int i, j, group_size, group_gap;
4418 tree msq = NULL_TREE, lsq;
4419 tree offset = NULL_TREE;
4420 tree realignment_token = NULL_TREE;
4421 gimple phi = NULL;
4422 vec<tree> dr_chain = vNULL;
4423 bool grouped_load = false;
4424 bool load_lanes_p = false;
4425 gimple first_stmt;
4426 bool inv_p;
4427 bool negative = false;
4428 bool compute_in_loop = false;
4429 struct loop *at_loop;
4430 int vec_num;
4431 bool slp = (slp_node != NULL);
4432 bool slp_perm = false;
4433 enum tree_code code;
4434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4435 int vf;
4436 tree aggr_type;
4437 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4438 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4439 int gather_scale = 1;
4440 enum vect_def_type gather_dt = vect_unknown_def_type;
4441
4442 if (loop_vinfo)
4443 {
4444 loop = LOOP_VINFO_LOOP (loop_vinfo);
4445 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4446 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4447 }
4448 else
4449 vf = 1;
4450
4451 /* Multiple types in SLP are handled by creating the appropriate number of
4452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4453 case of SLP. */
4454 if (slp || PURE_SLP_STMT (stmt_info))
4455 ncopies = 1;
4456 else
4457 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4458
4459 gcc_assert (ncopies >= 1);
4460
4461 /* FORNOW. This restriction should be relaxed. */
4462 if (nested_in_vect_loop && ncopies > 1)
4463 {
4464 if (dump_enabled_p ())
4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4466 "multiple types in nested loop.\n");
4467 return false;
4468 }
4469
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4471 return false;
4472
4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4474 return false;
4475
4476 /* Is vectorizable load? */
4477 if (!is_gimple_assign (stmt))
4478 return false;
4479
4480 scalar_dest = gimple_assign_lhs (stmt);
4481 if (TREE_CODE (scalar_dest) != SSA_NAME)
4482 return false;
4483
4484 code = gimple_assign_rhs_code (stmt);
4485 if (code != ARRAY_REF
4486 && code != BIT_FIELD_REF
4487 && code != INDIRECT_REF
4488 && code != COMPONENT_REF
4489 && code != IMAGPART_EXPR
4490 && code != REALPART_EXPR
4491 && code != MEM_REF
4492 && TREE_CODE_CLASS (code) != tcc_declaration)
4493 return false;
4494
4495 if (!STMT_VINFO_DATA_REF (stmt_info))
4496 return false;
4497
4498 elem_type = TREE_TYPE (vectype);
4499 mode = TYPE_MODE (vectype);
4500
4501 /* FORNOW. In some cases can vectorize even if data-type not supported
4502 (e.g. - data copies). */
4503 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4504 {
4505 if (dump_enabled_p ())
4506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4507 "Aligned load, but unsupported type.\n");
4508 return false;
4509 }
4510
4511 /* Check if the load is a part of an interleaving chain. */
4512 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4513 {
4514 grouped_load = true;
4515 /* FORNOW */
4516 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4517
4518 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4519 if (!slp && !PURE_SLP_STMT (stmt_info))
4520 {
4521 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4522 if (vect_load_lanes_supported (vectype, group_size))
4523 load_lanes_p = true;
4524 else if (!vect_grouped_load_supported (vectype, group_size))
4525 return false;
4526 }
4527 }
4528
4529
4530 if (STMT_VINFO_GATHER_P (stmt_info))
4531 {
4532 gimple def_stmt;
4533 tree def;
4534 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4535 &gather_off, &gather_scale);
4536 gcc_assert (gather_decl);
4537 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4538 &def_stmt, &def, &gather_dt,
4539 &gather_off_vectype))
4540 {
4541 if (dump_enabled_p ())
4542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4543 "gather index use not simple.\n");
4544 return false;
4545 }
4546 }
4547 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4548 ;
4549 else
4550 {
4551 negative = tree_int_cst_compare (nested_in_vect_loop
4552 ? STMT_VINFO_DR_STEP (stmt_info)
4553 : DR_STEP (dr),
4554 size_zero_node) < 0;
4555 if (negative && ncopies > 1)
4556 {
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559 "multiple types with negative step.\n");
4560 return false;
4561 }
4562
4563 if (negative)
4564 {
4565 if (grouped_load)
4566 {
4567 if (dump_enabled_p ())
4568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4569 "negative step for group load not supported"
4570 "\n");
4571 return false;
4572 }
4573 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4574 if (alignment_support_scheme != dr_aligned
4575 && alignment_support_scheme != dr_unaligned_supported)
4576 {
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "negative step but alignment required.\n");
4580 return false;
4581 }
4582 if (!perm_mask_for_reverse (vectype))
4583 {
4584 if (dump_enabled_p ())
4585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4586 "negative step and reversing not supported."
4587 "\n");
4588 return false;
4589 }
4590 }
4591 }
4592
4593 if (!vec_stmt) /* transformation not required. */
4594 {
4595 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4596 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4597 return true;
4598 }
4599
4600 if (dump_enabled_p ())
4601 dump_printf_loc (MSG_NOTE, vect_location,
4602 "transform load. ncopies = %d\n", ncopies);
4603
4604 /** Transform. **/
4605
4606 ensure_base_align (stmt_info, dr);
4607
4608 if (STMT_VINFO_GATHER_P (stmt_info))
4609 {
4610 tree vec_oprnd0 = NULL_TREE, op;
4611 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4612 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4613 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4614 edge pe = loop_preheader_edge (loop);
4615 gimple_seq seq;
4616 basic_block new_bb;
4617 enum { NARROW, NONE, WIDEN } modifier;
4618 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4619
4620 if (nunits == gather_off_nunits)
4621 modifier = NONE;
4622 else if (nunits == gather_off_nunits / 2)
4623 {
4624 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4625 modifier = WIDEN;
4626
4627 for (i = 0; i < gather_off_nunits; ++i)
4628 sel[i] = i | nunits;
4629
4630 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4631 gcc_assert (perm_mask != NULL_TREE);
4632 }
4633 else if (nunits == gather_off_nunits * 2)
4634 {
4635 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4636 modifier = NARROW;
4637
4638 for (i = 0; i < nunits; ++i)
4639 sel[i] = i < gather_off_nunits
4640 ? i : i + nunits - gather_off_nunits;
4641
4642 perm_mask = vect_gen_perm_mask (vectype, sel);
4643 gcc_assert (perm_mask != NULL_TREE);
4644 ncopies *= 2;
4645 }
4646 else
4647 gcc_unreachable ();
4648
4649 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4650 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4651 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4652 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4653 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4654 scaletype = TREE_VALUE (arglist);
4655 gcc_checking_assert (types_compatible_p (srctype, rettype)
4656 && types_compatible_p (srctype, masktype));
4657
4658 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4659
4660 ptr = fold_convert (ptrtype, gather_base);
4661 if (!is_gimple_min_invariant (ptr))
4662 {
4663 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4664 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4665 gcc_assert (!new_bb);
4666 }
4667
4668 /* Currently we support only unconditional gather loads,
4669 so mask should be all ones. */
4670 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4671 mask = build_int_cst (TREE_TYPE (masktype), -1);
4672 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4673 {
4674 REAL_VALUE_TYPE r;
4675 long tmp[6];
4676 for (j = 0; j < 6; ++j)
4677 tmp[j] = -1;
4678 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4679 mask = build_real (TREE_TYPE (masktype), r);
4680 }
4681 else
4682 gcc_unreachable ();
4683 mask = build_vector_from_val (masktype, mask);
4684 mask = vect_init_vector (stmt, mask, masktype, NULL);
4685
4686 scale = build_int_cst (scaletype, gather_scale);
4687
4688 prev_stmt_info = NULL;
4689 for (j = 0; j < ncopies; ++j)
4690 {
4691 if (modifier == WIDEN && (j & 1))
4692 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4693 perm_mask, stmt, gsi);
4694 else if (j == 0)
4695 op = vec_oprnd0
4696 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4697 else
4698 op = vec_oprnd0
4699 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4700
4701 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4702 {
4703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4704 == TYPE_VECTOR_SUBPARTS (idxtype));
4705 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4706 var = make_ssa_name (var, NULL);
4707 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4708 new_stmt
4709 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4710 op, NULL_TREE);
4711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4712 op = var;
4713 }
4714
4715 new_stmt
4716 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4717
4718 if (!useless_type_conversion_p (vectype, rettype))
4719 {
4720 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4721 == TYPE_VECTOR_SUBPARTS (rettype));
4722 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4723 op = make_ssa_name (var, new_stmt);
4724 gimple_call_set_lhs (new_stmt, op);
4725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4726 var = make_ssa_name (vec_dest, NULL);
4727 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4728 new_stmt
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4730 NULL_TREE);
4731 }
4732 else
4733 {
4734 var = make_ssa_name (vec_dest, new_stmt);
4735 gimple_call_set_lhs (new_stmt, var);
4736 }
4737
4738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4739
4740 if (modifier == NARROW)
4741 {
4742 if ((j & 1) == 0)
4743 {
4744 prev_res = var;
4745 continue;
4746 }
4747 var = permute_vec_elements (prev_res, var,
4748 perm_mask, stmt, gsi);
4749 new_stmt = SSA_NAME_DEF_STMT (var);
4750 }
4751
4752 if (prev_stmt_info == NULL)
4753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4754 else
4755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4756 prev_stmt_info = vinfo_for_stmt (new_stmt);
4757 }
4758 return true;
4759 }
4760 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4761 {
4762 gimple_stmt_iterator incr_gsi;
4763 bool insert_after;
4764 gimple incr;
4765 tree offvar;
4766 tree ivstep;
4767 tree running_off;
4768 vec<constructor_elt, va_gc> *v = NULL;
4769 gimple_seq stmts = NULL;
4770 tree stride_base, stride_step, alias_off;
4771
4772 gcc_assert (!nested_in_vect_loop);
4773
4774 stride_base
4775 = fold_build_pointer_plus
4776 (unshare_expr (DR_BASE_ADDRESS (dr)),
4777 size_binop (PLUS_EXPR,
4778 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4779 convert_to_ptrofftype (DR_INIT (dr))));
4780 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4781
4782 /* For a load with loop-invariant (but other than power-of-2)
4783 stride (i.e. not a grouped access) like so:
4784
4785 for (i = 0; i < n; i += stride)
4786 ... = array[i];
4787
4788 we generate a new induction variable and new accesses to
4789 form a new vector (or vectors, depending on ncopies):
4790
4791 for (j = 0; ; j += VF*stride)
4792 tmp1 = array[j];
4793 tmp2 = array[j + stride];
4794 ...
4795 vectemp = {tmp1, tmp2, ...}
4796 */
4797
4798 ivstep = stride_step;
4799 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4800 build_int_cst (TREE_TYPE (ivstep), vf));
4801
4802 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4803
4804 create_iv (stride_base, ivstep, NULL,
4805 loop, &incr_gsi, insert_after,
4806 &offvar, NULL);
4807 incr = gsi_stmt (incr_gsi);
4808 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4809
4810 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4811 if (stmts)
4812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4813
4814 prev_stmt_info = NULL;
4815 running_off = offvar;
4816 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4817 for (j = 0; j < ncopies; j++)
4818 {
4819 tree vec_inv;
4820
4821 vec_alloc (v, nunits);
4822 for (i = 0; i < nunits; i++)
4823 {
4824 tree newref, newoff;
4825 gimple incr;
4826 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4827 running_off, alias_off);
4828
4829 newref = force_gimple_operand_gsi (gsi, newref, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4832 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4833 newoff = copy_ssa_name (running_off, NULL);
4834 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4835 running_off, stride_step);
4836 vect_finish_stmt_generation (stmt, incr, gsi);
4837
4838 running_off = newoff;
4839 }
4840
4841 vec_inv = build_constructor (vectype, v);
4842 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4843 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4844
4845 if (j == 0)
4846 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4847 else
4848 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4849 prev_stmt_info = vinfo_for_stmt (new_stmt);
4850 }
4851 return true;
4852 }
4853
4854 if (grouped_load)
4855 {
4856 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4857 if (slp
4858 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4859 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4860 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4861
4862 /* Check if the chain of loads is already vectorized. */
4863 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4864 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4865 ??? But we can only do so if there is exactly one
4866 as we have no way to get at the rest. Leave the CSE
4867 opportunity alone.
4868 ??? With the group load eventually participating
4869 in multiple different permutations (having multiple
4870 slp nodes which refer to the same group) the CSE
4871 is even wrong code. See PR56270. */
4872 && !slp)
4873 {
4874 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4875 return true;
4876 }
4877 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4878 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4879
4880 /* VEC_NUM is the number of vect stmts to be created for this group. */
4881 if (slp)
4882 {
4883 grouped_load = false;
4884 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4885 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4886 slp_perm = true;
4887 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4888 }
4889 else
4890 {
4891 vec_num = group_size;
4892 group_gap = 0;
4893 }
4894 }
4895 else
4896 {
4897 first_stmt = stmt;
4898 first_dr = dr;
4899 group_size = vec_num = 1;
4900 group_gap = 0;
4901 }
4902
4903 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4904 gcc_assert (alignment_support_scheme);
4905 /* Targets with load-lane instructions must not require explicit
4906 realignment. */
4907 gcc_assert (!load_lanes_p
4908 || alignment_support_scheme == dr_aligned
4909 || alignment_support_scheme == dr_unaligned_supported);
4910
4911 /* In case the vectorization factor (VF) is bigger than the number
4912 of elements that we can fit in a vectype (nunits), we have to generate
4913 more than one vector stmt - i.e - we need to "unroll" the
4914 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4915 from one copy of the vector stmt to the next, in the field
4916 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4917 stages to find the correct vector defs to be used when vectorizing
4918 stmts that use the defs of the current stmt. The example below
4919 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4920 need to create 4 vectorized stmts):
4921
4922 before vectorization:
4923 RELATED_STMT VEC_STMT
4924 S1: x = memref - -
4925 S2: z = x + 1 - -
4926
4927 step 1: vectorize stmt S1:
4928 We first create the vector stmt VS1_0, and, as usual, record a
4929 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4930 Next, we create the vector stmt VS1_1, and record a pointer to
4931 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4932 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4933 stmts and pointers:
4934 RELATED_STMT VEC_STMT
4935 VS1_0: vx0 = memref0 VS1_1 -
4936 VS1_1: vx1 = memref1 VS1_2 -
4937 VS1_2: vx2 = memref2 VS1_3 -
4938 VS1_3: vx3 = memref3 - -
4939 S1: x = load - VS1_0
4940 S2: z = x + 1 - -
4941
4942 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4943 information we recorded in RELATED_STMT field is used to vectorize
4944 stmt S2. */
4945
4946 /* In case of interleaving (non-unit grouped access):
4947
4948 S1: x2 = &base + 2
4949 S2: x0 = &base
4950 S3: x1 = &base + 1
4951 S4: x3 = &base + 3
4952
4953 Vectorized loads are created in the order of memory accesses
4954 starting from the access of the first stmt of the chain:
4955
4956 VS1: vx0 = &base
4957 VS2: vx1 = &base + vec_size*1
4958 VS3: vx3 = &base + vec_size*2
4959 VS4: vx4 = &base + vec_size*3
4960
4961 Then permutation statements are generated:
4962
4963 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4964 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4965 ...
4966
4967 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4968 (the order of the data-refs in the output of vect_permute_load_chain
4969 corresponds to the order of scalar stmts in the interleaving chain - see
4970 the documentation of vect_permute_load_chain()).
4971 The generation of permutation stmts and recording them in
4972 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4973
4974 In case of both multiple types and interleaving, the vector loads and
4975 permutation stmts above are created for every copy. The result vector
4976 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4977 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4978
4979 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4980 on a target that supports unaligned accesses (dr_unaligned_supported)
4981 we generate the following code:
4982 p = initial_addr;
4983 indx = 0;
4984 loop {
4985 p = p + indx * vectype_size;
4986 vec_dest = *(p);
4987 indx = indx + 1;
4988 }
4989
4990 Otherwise, the data reference is potentially unaligned on a target that
4991 does not support unaligned accesses (dr_explicit_realign_optimized) -
4992 then generate the following code, in which the data in each iteration is
4993 obtained by two vector loads, one from the previous iteration, and one
4994 from the current iteration:
4995 p1 = initial_addr;
4996 msq_init = *(floor(p1))
4997 p2 = initial_addr + VS - 1;
4998 realignment_token = call target_builtin;
4999 indx = 0;
5000 loop {
5001 p2 = p2 + indx * vectype_size
5002 lsq = *(floor(p2))
5003 vec_dest = realign_load (msq, lsq, realignment_token)
5004 indx = indx + 1;
5005 msq = lsq;
5006 } */
5007
5008 /* If the misalignment remains the same throughout the execution of the
5009 loop, we can create the init_addr and permutation mask at the loop
5010 preheader. Otherwise, it needs to be created inside the loop.
5011 This can only occur when vectorizing memory accesses in the inner-loop
5012 nested within an outer-loop that is being vectorized. */
5013
5014 if (nested_in_vect_loop
5015 && (TREE_INT_CST_LOW (DR_STEP (dr))
5016 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5017 {
5018 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5019 compute_in_loop = true;
5020 }
5021
5022 if ((alignment_support_scheme == dr_explicit_realign_optimized
5023 || alignment_support_scheme == dr_explicit_realign)
5024 && !compute_in_loop)
5025 {
5026 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5027 alignment_support_scheme, NULL_TREE,
5028 &at_loop);
5029 if (alignment_support_scheme == dr_explicit_realign_optimized)
5030 {
5031 phi = SSA_NAME_DEF_STMT (msq);
5032 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5033 }
5034 }
5035 else
5036 at_loop = loop;
5037
5038 if (negative)
5039 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5040
5041 if (load_lanes_p)
5042 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5043 else
5044 aggr_type = vectype;
5045
5046 prev_stmt_info = NULL;
5047 for (j = 0; j < ncopies; j++)
5048 {
5049 /* 1. Create the vector or array pointer update chain. */
5050 if (j == 0)
5051 {
5052 bool simd_lane_access_p
5053 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5054 if (simd_lane_access_p
5055 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5056 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5057 && integer_zerop (DR_OFFSET (first_dr))
5058 && integer_zerop (DR_INIT (first_dr))
5059 && alias_sets_conflict_p (get_alias_set (aggr_type),
5060 get_alias_set (DR_REF (first_dr)))
5061 && (alignment_support_scheme == dr_aligned
5062 || alignment_support_scheme == dr_unaligned_supported))
5063 {
5064 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5065 dataref_offset = build_int_cst (reference_alias_ptr_type
5066 (DR_REF (first_dr)), 0);
5067 inv_p = false;
5068 }
5069 else
5070 dataref_ptr
5071 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5072 offset, &dummy, gsi, &ptr_incr,
5073 simd_lane_access_p, &inv_p);
5074 }
5075 else if (dataref_offset)
5076 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5077 TYPE_SIZE_UNIT (aggr_type));
5078 else
5079 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5080 TYPE_SIZE_UNIT (aggr_type));
5081
5082 if (grouped_load || slp_perm)
5083 dr_chain.create (vec_num);
5084
5085 if (load_lanes_p)
5086 {
5087 tree vec_array;
5088
5089 vec_array = create_vector_array (vectype, vec_num);
5090
5091 /* Emit:
5092 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5093 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5094 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5095 gimple_call_set_lhs (new_stmt, vec_array);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5097
5098 /* Extract each vector into an SSA_NAME. */
5099 for (i = 0; i < vec_num; i++)
5100 {
5101 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5102 vec_array, i);
5103 dr_chain.quick_push (new_temp);
5104 }
5105
5106 /* Record the mapping between SSA_NAMEs and statements. */
5107 vect_record_grouped_load_vectors (stmt, dr_chain);
5108 }
5109 else
5110 {
5111 for (i = 0; i < vec_num; i++)
5112 {
5113 if (i > 0)
5114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5115 stmt, NULL_TREE);
5116
5117 /* 2. Create the vector-load in the loop. */
5118 switch (alignment_support_scheme)
5119 {
5120 case dr_aligned:
5121 case dr_unaligned_supported:
5122 {
5123 unsigned int align, misalign;
5124
5125 data_ref
5126 = build2 (MEM_REF, vectype, dataref_ptr,
5127 dataref_offset
5128 ? dataref_offset
5129 : build_int_cst (reference_alias_ptr_type
5130 (DR_REF (first_dr)), 0));
5131 align = TYPE_ALIGN_UNIT (vectype);
5132 if (alignment_support_scheme == dr_aligned)
5133 {
5134 gcc_assert (aligned_access_p (first_dr));
5135 misalign = 0;
5136 }
5137 else if (DR_MISALIGNMENT (first_dr) == -1)
5138 {
5139 TREE_TYPE (data_ref)
5140 = build_aligned_type (TREE_TYPE (data_ref),
5141 TYPE_ALIGN (elem_type));
5142 align = TYPE_ALIGN_UNIT (elem_type);
5143 misalign = 0;
5144 }
5145 else
5146 {
5147 TREE_TYPE (data_ref)
5148 = build_aligned_type (TREE_TYPE (data_ref),
5149 TYPE_ALIGN (elem_type));
5150 misalign = DR_MISALIGNMENT (first_dr);
5151 }
5152 if (dataref_offset == NULL_TREE)
5153 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5154 align, misalign);
5155 break;
5156 }
5157 case dr_explicit_realign:
5158 {
5159 tree ptr, bump;
5160 tree vs_minus_1;
5161
5162 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5163
5164 if (compute_in_loop)
5165 msq = vect_setup_realignment (first_stmt, gsi,
5166 &realignment_token,
5167 dr_explicit_realign,
5168 dataref_ptr, NULL);
5169
5170 ptr = copy_ssa_name (dataref_ptr, NULL);
5171 new_stmt = gimple_build_assign_with_ops
5172 (BIT_AND_EXPR, ptr, dataref_ptr,
5173 build_int_cst
5174 (TREE_TYPE (dataref_ptr),
5175 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5177 data_ref
5178 = build2 (MEM_REF, vectype, ptr,
5179 build_int_cst (reference_alias_ptr_type
5180 (DR_REF (first_dr)), 0));
5181 vec_dest = vect_create_destination_var (scalar_dest,
5182 vectype);
5183 new_stmt = gimple_build_assign (vec_dest, data_ref);
5184 new_temp = make_ssa_name (vec_dest, new_stmt);
5185 gimple_assign_set_lhs (new_stmt, new_temp);
5186 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5187 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5189 msq = new_temp;
5190
5191 bump = size_binop (MULT_EXPR, vs_minus_1,
5192 TYPE_SIZE_UNIT (elem_type));
5193 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5194 new_stmt = gimple_build_assign_with_ops
5195 (BIT_AND_EXPR, NULL_TREE, ptr,
5196 build_int_cst
5197 (TREE_TYPE (ptr),
5198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5199 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5200 gimple_assign_set_lhs (new_stmt, ptr);
5201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5202 data_ref
5203 = build2 (MEM_REF, vectype, ptr,
5204 build_int_cst (reference_alias_ptr_type
5205 (DR_REF (first_dr)), 0));
5206 break;
5207 }
5208 case dr_explicit_realign_optimized:
5209 new_temp = copy_ssa_name (dataref_ptr, NULL);
5210 new_stmt = gimple_build_assign_with_ops
5211 (BIT_AND_EXPR, new_temp, dataref_ptr,
5212 build_int_cst
5213 (TREE_TYPE (dataref_ptr),
5214 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5216 data_ref
5217 = build2 (MEM_REF, vectype, new_temp,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr)), 0));
5220 break;
5221 default:
5222 gcc_unreachable ();
5223 }
5224 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5225 new_stmt = gimple_build_assign (vec_dest, data_ref);
5226 new_temp = make_ssa_name (vec_dest, new_stmt);
5227 gimple_assign_set_lhs (new_stmt, new_temp);
5228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5229
5230 /* 3. Handle explicit realignment if necessary/supported.
5231 Create in loop:
5232 vec_dest = realign_load (msq, lsq, realignment_token) */
5233 if (alignment_support_scheme == dr_explicit_realign_optimized
5234 || alignment_support_scheme == dr_explicit_realign)
5235 {
5236 lsq = gimple_assign_lhs (new_stmt);
5237 if (!realignment_token)
5238 realignment_token = dataref_ptr;
5239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5240 new_stmt
5241 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5242 vec_dest, msq, lsq,
5243 realignment_token);
5244 new_temp = make_ssa_name (vec_dest, new_stmt);
5245 gimple_assign_set_lhs (new_stmt, new_temp);
5246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5247
5248 if (alignment_support_scheme == dr_explicit_realign_optimized)
5249 {
5250 gcc_assert (phi);
5251 if (i == vec_num - 1 && j == ncopies - 1)
5252 add_phi_arg (phi, lsq,
5253 loop_latch_edge (containing_loop),
5254 UNKNOWN_LOCATION);
5255 msq = lsq;
5256 }
5257 }
5258
5259 /* 4. Handle invariant-load. */
5260 if (inv_p && !bb_vinfo)
5261 {
5262 gimple_stmt_iterator gsi2 = *gsi;
5263 gcc_assert (!grouped_load);
5264 gsi_next (&gsi2);
5265 new_temp = vect_init_vector (stmt, scalar_dest,
5266 vectype, &gsi2);
5267 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5268 }
5269
5270 if (negative)
5271 {
5272 tree perm_mask = perm_mask_for_reverse (vectype);
5273 new_temp = permute_vec_elements (new_temp, new_temp,
5274 perm_mask, stmt, gsi);
5275 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5276 }
5277
5278 /* Collect vector loads and later create their permutation in
5279 vect_transform_grouped_load (). */
5280 if (grouped_load || slp_perm)
5281 dr_chain.quick_push (new_temp);
5282
5283 /* Store vector loads in the corresponding SLP_NODE. */
5284 if (slp && !slp_perm)
5285 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5286 }
5287 /* Bump the vector pointer to account for a gap. */
5288 if (slp && group_gap != 0)
5289 {
5290 tree bump = size_binop (MULT_EXPR,
5291 TYPE_SIZE_UNIT (elem_type),
5292 size_int (group_gap));
5293 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5294 stmt, bump);
5295 }
5296 }
5297
5298 if (slp && !slp_perm)
5299 continue;
5300
5301 if (slp_perm)
5302 {
5303 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5304 slp_node_instance, false))
5305 {
5306 dr_chain.release ();
5307 return false;
5308 }
5309 }
5310 else
5311 {
5312 if (grouped_load)
5313 {
5314 if (!load_lanes_p)
5315 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5316 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5317 }
5318 else
5319 {
5320 if (j == 0)
5321 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5322 else
5323 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5324 prev_stmt_info = vinfo_for_stmt (new_stmt);
5325 }
5326 }
5327 dr_chain.release ();
5328 }
5329
5330 return true;
5331 }
5332
5333 /* Function vect_is_simple_cond.
5334
5335 Input:
5336 LOOP - the loop that is being vectorized.
5337 COND - Condition that is checked for simple use.
5338
5339 Output:
5340 *COMP_VECTYPE - the vector type for the comparison.
5341
5342 Returns whether a COND can be vectorized. Checks whether
5343 condition operands are supportable using vec_is_simple_use. */
5344
5345 static bool
5346 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5347 bb_vec_info bb_vinfo, tree *comp_vectype)
5348 {
5349 tree lhs, rhs;
5350 tree def;
5351 enum vect_def_type dt;
5352 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5353
5354 if (!COMPARISON_CLASS_P (cond))
5355 return false;
5356
5357 lhs = TREE_OPERAND (cond, 0);
5358 rhs = TREE_OPERAND (cond, 1);
5359
5360 if (TREE_CODE (lhs) == SSA_NAME)
5361 {
5362 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5363 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5364 &lhs_def_stmt, &def, &dt, &vectype1))
5365 return false;
5366 }
5367 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5368 && TREE_CODE (lhs) != FIXED_CST)
5369 return false;
5370
5371 if (TREE_CODE (rhs) == SSA_NAME)
5372 {
5373 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5374 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5375 &rhs_def_stmt, &def, &dt, &vectype2))
5376 return false;
5377 }
5378 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5379 && TREE_CODE (rhs) != FIXED_CST)
5380 return false;
5381
5382 *comp_vectype = vectype1 ? vectype1 : vectype2;
5383 return true;
5384 }
5385
5386 /* vectorizable_condition.
5387
5388 Check if STMT is conditional modify expression that can be vectorized.
5389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5390 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5391 at GSI.
5392
5393 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5394 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5395 else caluse if it is 2).
5396
5397 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5398
5399 bool
5400 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5401 gimple *vec_stmt, tree reduc_def, int reduc_index,
5402 slp_tree slp_node)
5403 {
5404 tree scalar_dest = NULL_TREE;
5405 tree vec_dest = NULL_TREE;
5406 tree cond_expr, then_clause, else_clause;
5407 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5408 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5409 tree comp_vectype = NULL_TREE;
5410 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5411 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5412 tree vec_compare, vec_cond_expr;
5413 tree new_temp;
5414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5415 tree def;
5416 enum vect_def_type dt, dts[4];
5417 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5418 int ncopies;
5419 enum tree_code code;
5420 stmt_vec_info prev_stmt_info = NULL;
5421 int i, j;
5422 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5423 vec<tree> vec_oprnds0 = vNULL;
5424 vec<tree> vec_oprnds1 = vNULL;
5425 vec<tree> vec_oprnds2 = vNULL;
5426 vec<tree> vec_oprnds3 = vNULL;
5427 tree vec_cmp_type;
5428
5429 if (slp_node || PURE_SLP_STMT (stmt_info))
5430 ncopies = 1;
5431 else
5432 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5433
5434 gcc_assert (ncopies >= 1);
5435 if (reduc_index && ncopies > 1)
5436 return false; /* FORNOW */
5437
5438 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5439 return false;
5440
5441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5442 return false;
5443
5444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5445 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5446 && reduc_def))
5447 return false;
5448
5449 /* FORNOW: not yet supported. */
5450 if (STMT_VINFO_LIVE_P (stmt_info))
5451 {
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5454 "value used after loop.\n");
5455 return false;
5456 }
5457
5458 /* Is vectorizable conditional operation? */
5459 if (!is_gimple_assign (stmt))
5460 return false;
5461
5462 code = gimple_assign_rhs_code (stmt);
5463
5464 if (code != COND_EXPR)
5465 return false;
5466
5467 cond_expr = gimple_assign_rhs1 (stmt);
5468 then_clause = gimple_assign_rhs2 (stmt);
5469 else_clause = gimple_assign_rhs3 (stmt);
5470
5471 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5472 &comp_vectype)
5473 || !comp_vectype)
5474 return false;
5475
5476 if (TREE_CODE (then_clause) == SSA_NAME)
5477 {
5478 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5479 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5480 &then_def_stmt, &def, &dt))
5481 return false;
5482 }
5483 else if (TREE_CODE (then_clause) != INTEGER_CST
5484 && TREE_CODE (then_clause) != REAL_CST
5485 && TREE_CODE (then_clause) != FIXED_CST)
5486 return false;
5487
5488 if (TREE_CODE (else_clause) == SSA_NAME)
5489 {
5490 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5491 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5492 &else_def_stmt, &def, &dt))
5493 return false;
5494 }
5495 else if (TREE_CODE (else_clause) != INTEGER_CST
5496 && TREE_CODE (else_clause) != REAL_CST
5497 && TREE_CODE (else_clause) != FIXED_CST)
5498 return false;
5499
5500 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5501 /* The result of a vector comparison should be signed type. */
5502 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5503 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5504 if (vec_cmp_type == NULL_TREE)
5505 return false;
5506
5507 if (!vec_stmt)
5508 {
5509 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5510 return expand_vec_cond_expr_p (vectype, comp_vectype);
5511 }
5512
5513 /* Transform. */
5514
5515 if (!slp_node)
5516 {
5517 vec_oprnds0.create (1);
5518 vec_oprnds1.create (1);
5519 vec_oprnds2.create (1);
5520 vec_oprnds3.create (1);
5521 }
5522
5523 /* Handle def. */
5524 scalar_dest = gimple_assign_lhs (stmt);
5525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5526
5527 /* Handle cond expr. */
5528 for (j = 0; j < ncopies; j++)
5529 {
5530 gimple new_stmt = NULL;
5531 if (j == 0)
5532 {
5533 if (slp_node)
5534 {
5535 stack_vec<tree, 4> ops;
5536 stack_vec<vec<tree>, 4> vec_defs;
5537
5538 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5539 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5540 ops.safe_push (then_clause);
5541 ops.safe_push (else_clause);
5542 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5543 vec_oprnds3 = vec_defs.pop ();
5544 vec_oprnds2 = vec_defs.pop ();
5545 vec_oprnds1 = vec_defs.pop ();
5546 vec_oprnds0 = vec_defs.pop ();
5547
5548 ops.release ();
5549 vec_defs.release ();
5550 }
5551 else
5552 {
5553 gimple gtemp;
5554 vec_cond_lhs =
5555 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5556 stmt, NULL);
5557 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5558 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5559
5560 vec_cond_rhs =
5561 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5562 stmt, NULL);
5563 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5564 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5565 if (reduc_index == 1)
5566 vec_then_clause = reduc_def;
5567 else
5568 {
5569 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5570 stmt, NULL);
5571 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5572 NULL, &gtemp, &def, &dts[2]);
5573 }
5574 if (reduc_index == 2)
5575 vec_else_clause = reduc_def;
5576 else
5577 {
5578 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5579 stmt, NULL);
5580 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5581 NULL, &gtemp, &def, &dts[3]);
5582 }
5583 }
5584 }
5585 else
5586 {
5587 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5588 vec_oprnds0.pop ());
5589 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5590 vec_oprnds1.pop ());
5591 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5592 vec_oprnds2.pop ());
5593 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5594 vec_oprnds3.pop ());
5595 }
5596
5597 if (!slp_node)
5598 {
5599 vec_oprnds0.quick_push (vec_cond_lhs);
5600 vec_oprnds1.quick_push (vec_cond_rhs);
5601 vec_oprnds2.quick_push (vec_then_clause);
5602 vec_oprnds3.quick_push (vec_else_clause);
5603 }
5604
5605 /* Arguments are ready. Create the new vector stmt. */
5606 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5607 {
5608 vec_cond_rhs = vec_oprnds1[i];
5609 vec_then_clause = vec_oprnds2[i];
5610 vec_else_clause = vec_oprnds3[i];
5611
5612 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5613 vec_cond_lhs, vec_cond_rhs);
5614 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5615 vec_compare, vec_then_clause, vec_else_clause);
5616
5617 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5618 new_temp = make_ssa_name (vec_dest, new_stmt);
5619 gimple_assign_set_lhs (new_stmt, new_temp);
5620 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5621 if (slp_node)
5622 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5623 }
5624
5625 if (slp_node)
5626 continue;
5627
5628 if (j == 0)
5629 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5630 else
5631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5632
5633 prev_stmt_info = vinfo_for_stmt (new_stmt);
5634 }
5635
5636 vec_oprnds0.release ();
5637 vec_oprnds1.release ();
5638 vec_oprnds2.release ();
5639 vec_oprnds3.release ();
5640
5641 return true;
5642 }
5643
5644
5645 /* Make sure the statement is vectorizable. */
5646
5647 bool
5648 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5649 {
5650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5652 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5653 bool ok;
5654 tree scalar_type, vectype;
5655 gimple pattern_stmt;
5656 gimple_seq pattern_def_seq;
5657
5658 if (dump_enabled_p ())
5659 {
5660 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5662 dump_printf (MSG_NOTE, "\n");
5663 }
5664
5665 if (gimple_has_volatile_ops (stmt))
5666 {
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "not vectorized: stmt has volatile operands\n");
5670
5671 return false;
5672 }
5673
5674 /* Skip stmts that do not need to be vectorized. In loops this is expected
5675 to include:
5676 - the COND_EXPR which is the loop exit condition
5677 - any LABEL_EXPRs in the loop
5678 - computations that are used only for array indexing or loop control.
5679 In basic blocks we only analyze statements that are a part of some SLP
5680 instance, therefore, all the statements are relevant.
5681
5682 Pattern statement needs to be analyzed instead of the original statement
5683 if the original statement is not relevant. Otherwise, we analyze both
5684 statements. In basic blocks we are called from some SLP instance
5685 traversal, don't analyze pattern stmts instead, the pattern stmts
5686 already will be part of SLP instance. */
5687
5688 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5689 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5690 && !STMT_VINFO_LIVE_P (stmt_info))
5691 {
5692 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5693 && pattern_stmt
5694 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5695 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5696 {
5697 /* Analyze PATTERN_STMT instead of the original stmt. */
5698 stmt = pattern_stmt;
5699 stmt_info = vinfo_for_stmt (pattern_stmt);
5700 if (dump_enabled_p ())
5701 {
5702 dump_printf_loc (MSG_NOTE, vect_location,
5703 "==> examining pattern statement: ");
5704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5705 dump_printf (MSG_NOTE, "\n");
5706 }
5707 }
5708 else
5709 {
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
5712
5713 return true;
5714 }
5715 }
5716 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5717 && node == NULL
5718 && pattern_stmt
5719 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5720 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5721 {
5722 /* Analyze PATTERN_STMT too. */
5723 if (dump_enabled_p ())
5724 {
5725 dump_printf_loc (MSG_NOTE, vect_location,
5726 "==> examining pattern statement: ");
5727 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5728 dump_printf (MSG_NOTE, "\n");
5729 }
5730
5731 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5732 return false;
5733 }
5734
5735 if (is_pattern_stmt_p (stmt_info)
5736 && node == NULL
5737 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5738 {
5739 gimple_stmt_iterator si;
5740
5741 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5742 {
5743 gimple pattern_def_stmt = gsi_stmt (si);
5744 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5745 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5746 {
5747 /* Analyze def stmt of STMT if it's a pattern stmt. */
5748 if (dump_enabled_p ())
5749 {
5750 dump_printf_loc (MSG_NOTE, vect_location,
5751 "==> examining pattern def statement: ");
5752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5753 dump_printf (MSG_NOTE, "\n");
5754 }
5755
5756 if (!vect_analyze_stmt (pattern_def_stmt,
5757 need_to_vectorize, node))
5758 return false;
5759 }
5760 }
5761 }
5762
5763 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5764 {
5765 case vect_internal_def:
5766 break;
5767
5768 case vect_reduction_def:
5769 case vect_nested_cycle:
5770 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5771 || relevance == vect_used_in_outer_by_reduction
5772 || relevance == vect_unused_in_scope));
5773 break;
5774
5775 case vect_induction_def:
5776 case vect_constant_def:
5777 case vect_external_def:
5778 case vect_unknown_def_type:
5779 default:
5780 gcc_unreachable ();
5781 }
5782
5783 if (bb_vinfo)
5784 {
5785 gcc_assert (PURE_SLP_STMT (stmt_info));
5786
5787 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5788 if (dump_enabled_p ())
5789 {
5790 dump_printf_loc (MSG_NOTE, vect_location,
5791 "get vectype for scalar type: ");
5792 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5793 dump_printf (MSG_NOTE, "\n");
5794 }
5795
5796 vectype = get_vectype_for_scalar_type (scalar_type);
5797 if (!vectype)
5798 {
5799 if (dump_enabled_p ())
5800 {
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5802 "not SLPed: unsupported data-type ");
5803 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5804 scalar_type);
5805 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5806 }
5807 return false;
5808 }
5809
5810 if (dump_enabled_p ())
5811 {
5812 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5813 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5814 dump_printf (MSG_NOTE, "\n");
5815 }
5816
5817 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5818 }
5819
5820 if (STMT_VINFO_RELEVANT_P (stmt_info))
5821 {
5822 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5823 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5824 *need_to_vectorize = true;
5825 }
5826
5827 ok = true;
5828 if (!bb_vinfo
5829 && (STMT_VINFO_RELEVANT_P (stmt_info)
5830 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5831 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5832 || vectorizable_shift (stmt, NULL, NULL, NULL)
5833 || vectorizable_operation (stmt, NULL, NULL, NULL)
5834 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5835 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5836 || vectorizable_call (stmt, NULL, NULL, NULL)
5837 || vectorizable_store (stmt, NULL, NULL, NULL)
5838 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5839 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5840 else
5841 {
5842 if (bb_vinfo)
5843 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5844 || vectorizable_shift (stmt, NULL, NULL, node)
5845 || vectorizable_operation (stmt, NULL, NULL, node)
5846 || vectorizable_assignment (stmt, NULL, NULL, node)
5847 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5848 || vectorizable_call (stmt, NULL, NULL, node)
5849 || vectorizable_store (stmt, NULL, NULL, node)
5850 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5851 }
5852
5853 if (!ok)
5854 {
5855 if (dump_enabled_p ())
5856 {
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5858 "not vectorized: relevant stmt not ");
5859 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5861 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5862 }
5863
5864 return false;
5865 }
5866
5867 if (bb_vinfo)
5868 return true;
5869
5870 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5871 need extra handling, except for vectorizable reductions. */
5872 if (STMT_VINFO_LIVE_P (stmt_info)
5873 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5874 ok = vectorizable_live_operation (stmt, NULL, NULL);
5875
5876 if (!ok)
5877 {
5878 if (dump_enabled_p ())
5879 {
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "not vectorized: live stmt not ");
5882 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5884 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5885 }
5886
5887 return false;
5888 }
5889
5890 return true;
5891 }
5892
5893
5894 /* Function vect_transform_stmt.
5895
5896 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5897
5898 bool
5899 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5900 bool *grouped_store, slp_tree slp_node,
5901 slp_instance slp_node_instance)
5902 {
5903 bool is_store = false;
5904 gimple vec_stmt = NULL;
5905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5906 bool done;
5907
5908 switch (STMT_VINFO_TYPE (stmt_info))
5909 {
5910 case type_demotion_vec_info_type:
5911 case type_promotion_vec_info_type:
5912 case type_conversion_vec_info_type:
5913 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5914 gcc_assert (done);
5915 break;
5916
5917 case induc_vec_info_type:
5918 gcc_assert (!slp_node);
5919 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5920 gcc_assert (done);
5921 break;
5922
5923 case shift_vec_info_type:
5924 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5925 gcc_assert (done);
5926 break;
5927
5928 case op_vec_info_type:
5929 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5930 gcc_assert (done);
5931 break;
5932
5933 case assignment_vec_info_type:
5934 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5935 gcc_assert (done);
5936 break;
5937
5938 case load_vec_info_type:
5939 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5940 slp_node_instance);
5941 gcc_assert (done);
5942 break;
5943
5944 case store_vec_info_type:
5945 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5946 gcc_assert (done);
5947 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5948 {
5949 /* In case of interleaving, the whole chain is vectorized when the
5950 last store in the chain is reached. Store stmts before the last
5951 one are skipped, and there vec_stmt_info shouldn't be freed
5952 meanwhile. */
5953 *grouped_store = true;
5954 if (STMT_VINFO_VEC_STMT (stmt_info))
5955 is_store = true;
5956 }
5957 else
5958 is_store = true;
5959 break;
5960
5961 case condition_vec_info_type:
5962 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5963 gcc_assert (done);
5964 break;
5965
5966 case call_vec_info_type:
5967 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5968 stmt = gsi_stmt (*gsi);
5969 break;
5970
5971 case reduc_vec_info_type:
5972 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5973 gcc_assert (done);
5974 break;
5975
5976 default:
5977 if (!STMT_VINFO_LIVE_P (stmt_info))
5978 {
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5981 "stmt not supported.\n");
5982 gcc_unreachable ();
5983 }
5984 }
5985
5986 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5987 is being vectorized, but outside the immediately enclosing loop. */
5988 if (vec_stmt
5989 && STMT_VINFO_LOOP_VINFO (stmt_info)
5990 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5991 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5992 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5993 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5994 || STMT_VINFO_RELEVANT (stmt_info) ==
5995 vect_used_in_outer_by_reduction))
5996 {
5997 struct loop *innerloop = LOOP_VINFO_LOOP (
5998 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5999 imm_use_iterator imm_iter;
6000 use_operand_p use_p;
6001 tree scalar_dest;
6002 gimple exit_phi;
6003
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_NOTE, vect_location,
6006 "Record the vdef for outer-loop vectorization.\n");
6007
6008 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6009 (to be used when vectorizing outer-loop stmts that use the DEF of
6010 STMT). */
6011 if (gimple_code (stmt) == GIMPLE_PHI)
6012 scalar_dest = PHI_RESULT (stmt);
6013 else
6014 scalar_dest = gimple_assign_lhs (stmt);
6015
6016 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6017 {
6018 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6019 {
6020 exit_phi = USE_STMT (use_p);
6021 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6022 }
6023 }
6024 }
6025
6026 /* Handle stmts whose DEF is used outside the loop-nest that is
6027 being vectorized. */
6028 if (STMT_VINFO_LIVE_P (stmt_info)
6029 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6030 {
6031 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6032 gcc_assert (done);
6033 }
6034
6035 if (vec_stmt)
6036 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6037
6038 return is_store;
6039 }
6040
6041
6042 /* Remove a group of stores (for SLP or interleaving), free their
6043 stmt_vec_info. */
6044
6045 void
6046 vect_remove_stores (gimple first_stmt)
6047 {
6048 gimple next = first_stmt;
6049 gimple tmp;
6050 gimple_stmt_iterator next_si;
6051
6052 while (next)
6053 {
6054 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6055
6056 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6057 if (is_pattern_stmt_p (stmt_info))
6058 next = STMT_VINFO_RELATED_STMT (stmt_info);
6059 /* Free the attached stmt_vec_info and remove the stmt. */
6060 next_si = gsi_for_stmt (next);
6061 unlink_stmt_vdef (next);
6062 gsi_remove (&next_si, true);
6063 release_defs (next);
6064 free_stmt_vec_info (next);
6065 next = tmp;
6066 }
6067 }
6068
6069
6070 /* Function new_stmt_vec_info.
6071
6072 Create and initialize a new stmt_vec_info struct for STMT. */
6073
6074 stmt_vec_info
6075 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
6076 bb_vec_info bb_vinfo)
6077 {
6078 stmt_vec_info res;
6079 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6080
6081 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6082 STMT_VINFO_STMT (res) = stmt;
6083 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
6084 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
6085 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
6086 STMT_VINFO_LIVE_P (res) = false;
6087 STMT_VINFO_VECTYPE (res) = NULL;
6088 STMT_VINFO_VEC_STMT (res) = NULL;
6089 STMT_VINFO_VECTORIZABLE (res) = true;
6090 STMT_VINFO_IN_PATTERN_P (res) = false;
6091 STMT_VINFO_RELATED_STMT (res) = NULL;
6092 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
6093 STMT_VINFO_DATA_REF (res) = NULL;
6094
6095 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6096 STMT_VINFO_DR_OFFSET (res) = NULL;
6097 STMT_VINFO_DR_INIT (res) = NULL;
6098 STMT_VINFO_DR_STEP (res) = NULL;
6099 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6100
6101 if (gimple_code (stmt) == GIMPLE_PHI
6102 && is_loop_header_bb_p (gimple_bb (stmt)))
6103 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6104 else
6105 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6106
6107 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
6108 STMT_SLP_TYPE (res) = loop_vect;
6109 GROUP_FIRST_ELEMENT (res) = NULL;
6110 GROUP_NEXT_ELEMENT (res) = NULL;
6111 GROUP_SIZE (res) = 0;
6112 GROUP_STORE_COUNT (res) = 0;
6113 GROUP_GAP (res) = 0;
6114 GROUP_SAME_DR_STMT (res) = NULL;
6115
6116 return res;
6117 }
6118
6119
6120 /* Create a hash table for stmt_vec_info. */
6121
6122 void
6123 init_stmt_vec_info_vec (void)
6124 {
6125 gcc_assert (!stmt_vec_info_vec.exists ());
6126 stmt_vec_info_vec.create (50);
6127 }
6128
6129
6130 /* Free hash table for stmt_vec_info. */
6131
6132 void
6133 free_stmt_vec_info_vec (void)
6134 {
6135 unsigned int i;
6136 vec_void_p info;
6137 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6138 if (info != NULL)
6139 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6140 gcc_assert (stmt_vec_info_vec.exists ());
6141 stmt_vec_info_vec.release ();
6142 }
6143
6144
6145 /* Free stmt vectorization related info. */
6146
6147 void
6148 free_stmt_vec_info (gimple stmt)
6149 {
6150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6151
6152 if (!stmt_info)
6153 return;
6154
6155 /* Check if this statement has a related "pattern stmt"
6156 (introduced by the vectorizer during the pattern recognition
6157 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6158 too. */
6159 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6160 {
6161 stmt_vec_info patt_info
6162 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6163 if (patt_info)
6164 {
6165 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6166 if (seq)
6167 {
6168 gimple_stmt_iterator si;
6169 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6170 free_stmt_vec_info (gsi_stmt (si));
6171 }
6172 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6173 }
6174 }
6175
6176 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6177 set_vinfo_for_stmt (stmt, NULL);
6178 free (stmt_info);
6179 }
6180
6181
6182 /* Function get_vectype_for_scalar_type_and_size.
6183
6184 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6185 by the target. */
6186
6187 static tree
6188 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6189 {
6190 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6191 enum machine_mode simd_mode;
6192 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6193 int nunits;
6194 tree vectype;
6195
6196 if (nbytes == 0)
6197 return NULL_TREE;
6198
6199 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6200 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6201 return NULL_TREE;
6202
6203 /* For vector types of elements whose mode precision doesn't
6204 match their types precision we use a element type of mode
6205 precision. The vectorization routines will have to make sure
6206 they support the proper result truncation/extension.
6207 We also make sure to build vector types with INTEGER_TYPE
6208 component type only. */
6209 if (INTEGRAL_TYPE_P (scalar_type)
6210 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6211 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6212 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6213 TYPE_UNSIGNED (scalar_type));
6214
6215 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6216 When the component mode passes the above test simply use a type
6217 corresponding to that mode. The theory is that any use that
6218 would cause problems with this will disable vectorization anyway. */
6219 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6220 && !INTEGRAL_TYPE_P (scalar_type))
6221 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6222
6223 /* We can't build a vector type of elements with alignment bigger than
6224 their size. */
6225 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6226 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6227 TYPE_UNSIGNED (scalar_type));
6228
6229 /* If we felt back to using the mode fail if there was
6230 no scalar type for it. */
6231 if (scalar_type == NULL_TREE)
6232 return NULL_TREE;
6233
6234 /* If no size was supplied use the mode the target prefers. Otherwise
6235 lookup a vector mode of the specified size. */
6236 if (size == 0)
6237 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6238 else
6239 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6240 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6241 if (nunits <= 1)
6242 return NULL_TREE;
6243
6244 vectype = build_vector_type (scalar_type, nunits);
6245
6246 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6247 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6248 return NULL_TREE;
6249
6250 return vectype;
6251 }
6252
6253 unsigned int current_vector_size;
6254
6255 /* Function get_vectype_for_scalar_type.
6256
6257 Returns the vector type corresponding to SCALAR_TYPE as supported
6258 by the target. */
6259
6260 tree
6261 get_vectype_for_scalar_type (tree scalar_type)
6262 {
6263 tree vectype;
6264 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6265 current_vector_size);
6266 if (vectype
6267 && current_vector_size == 0)
6268 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6269 return vectype;
6270 }
6271
6272 /* Function get_same_sized_vectype
6273
6274 Returns a vector type corresponding to SCALAR_TYPE of size
6275 VECTOR_TYPE if supported by the target. */
6276
6277 tree
6278 get_same_sized_vectype (tree scalar_type, tree vector_type)
6279 {
6280 return get_vectype_for_scalar_type_and_size
6281 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6282 }
6283
6284 /* Function vect_is_simple_use.
6285
6286 Input:
6287 LOOP_VINFO - the vect info of the loop that is being vectorized.
6288 BB_VINFO - the vect info of the basic block that is being vectorized.
6289 OPERAND - operand of STMT in the loop or bb.
6290 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6291
6292 Returns whether a stmt with OPERAND can be vectorized.
6293 For loops, supportable operands are constants, loop invariants, and operands
6294 that are defined by the current iteration of the loop. Unsupportable
6295 operands are those that are defined by a previous iteration of the loop (as
6296 is the case in reduction/induction computations).
6297 For basic blocks, supportable operands are constants and bb invariants.
6298 For now, operands defined outside the basic block are not supported. */
6299
6300 bool
6301 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6302 bb_vec_info bb_vinfo, gimple *def_stmt,
6303 tree *def, enum vect_def_type *dt)
6304 {
6305 basic_block bb;
6306 stmt_vec_info stmt_vinfo;
6307 struct loop *loop = NULL;
6308
6309 if (loop_vinfo)
6310 loop = LOOP_VINFO_LOOP (loop_vinfo);
6311
6312 *def_stmt = NULL;
6313 *def = NULL_TREE;
6314
6315 if (dump_enabled_p ())
6316 {
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "vect_is_simple_use: operand ");
6319 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6320 dump_printf (MSG_NOTE, "\n");
6321 }
6322
6323 if (CONSTANT_CLASS_P (operand))
6324 {
6325 *dt = vect_constant_def;
6326 return true;
6327 }
6328
6329 if (is_gimple_min_invariant (operand))
6330 {
6331 *def = operand;
6332 *dt = vect_external_def;
6333 return true;
6334 }
6335
6336 if (TREE_CODE (operand) == PAREN_EXPR)
6337 {
6338 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
6340 operand = TREE_OPERAND (operand, 0);
6341 }
6342
6343 if (TREE_CODE (operand) != SSA_NAME)
6344 {
6345 if (dump_enabled_p ())
6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6347 "not ssa-name.\n");
6348 return false;
6349 }
6350
6351 *def_stmt = SSA_NAME_DEF_STMT (operand);
6352 if (*def_stmt == NULL)
6353 {
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356 "no def_stmt.\n");
6357 return false;
6358 }
6359
6360 if (dump_enabled_p ())
6361 {
6362 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6364 dump_printf (MSG_NOTE, "\n");
6365 }
6366
6367 /* Empty stmt is expected only in case of a function argument.
6368 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6369 if (gimple_nop_p (*def_stmt))
6370 {
6371 *def = operand;
6372 *dt = vect_external_def;
6373 return true;
6374 }
6375
6376 bb = gimple_bb (*def_stmt);
6377
6378 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6379 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6380 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6381 *dt = vect_external_def;
6382 else
6383 {
6384 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6385 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6386 }
6387
6388 if (*dt == vect_unknown_def_type
6389 || (stmt
6390 && *dt == vect_double_reduction_def
6391 && gimple_code (stmt) != GIMPLE_PHI))
6392 {
6393 if (dump_enabled_p ())
6394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6395 "Unsupported pattern.\n");
6396 return false;
6397 }
6398
6399 if (dump_enabled_p ())
6400 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
6401
6402 switch (gimple_code (*def_stmt))
6403 {
6404 case GIMPLE_PHI:
6405 *def = gimple_phi_result (*def_stmt);
6406 break;
6407
6408 case GIMPLE_ASSIGN:
6409 *def = gimple_assign_lhs (*def_stmt);
6410 break;
6411
6412 case GIMPLE_CALL:
6413 *def = gimple_call_lhs (*def_stmt);
6414 if (*def != NULL)
6415 break;
6416 /* FALLTHRU */
6417 default:
6418 if (dump_enabled_p ())
6419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6420 "unsupported defining stmt:\n");
6421 return false;
6422 }
6423
6424 return true;
6425 }
6426
6427 /* Function vect_is_simple_use_1.
6428
6429 Same as vect_is_simple_use_1 but also determines the vector operand
6430 type of OPERAND and stores it to *VECTYPE. If the definition of
6431 OPERAND is vect_uninitialized_def, vect_constant_def or
6432 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6433 is responsible to compute the best suited vector type for the
6434 scalar operand. */
6435
6436 bool
6437 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6438 bb_vec_info bb_vinfo, gimple *def_stmt,
6439 tree *def, enum vect_def_type *dt, tree *vectype)
6440 {
6441 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6442 def, dt))
6443 return false;
6444
6445 /* Now get a vector type if the def is internal, otherwise supply
6446 NULL_TREE and leave it up to the caller to figure out a proper
6447 type for the use stmt. */
6448 if (*dt == vect_internal_def
6449 || *dt == vect_induction_def
6450 || *dt == vect_reduction_def
6451 || *dt == vect_double_reduction_def
6452 || *dt == vect_nested_cycle)
6453 {
6454 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6455
6456 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6457 && !STMT_VINFO_RELEVANT (stmt_info)
6458 && !STMT_VINFO_LIVE_P (stmt_info))
6459 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6460
6461 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6462 gcc_assert (*vectype != NULL_TREE);
6463 }
6464 else if (*dt == vect_uninitialized_def
6465 || *dt == vect_constant_def
6466 || *dt == vect_external_def)
6467 *vectype = NULL_TREE;
6468 else
6469 gcc_unreachable ();
6470
6471 return true;
6472 }
6473
6474
6475 /* Function supportable_widening_operation
6476
6477 Check whether an operation represented by the code CODE is a
6478 widening operation that is supported by the target platform in
6479 vector form (i.e., when operating on arguments of type VECTYPE_IN
6480 producing a result of type VECTYPE_OUT).
6481
6482 Widening operations we currently support are NOP (CONVERT), FLOAT
6483 and WIDEN_MULT. This function checks if these operations are supported
6484 by the target platform either directly (via vector tree-codes), or via
6485 target builtins.
6486
6487 Output:
6488 - CODE1 and CODE2 are codes of vector operations to be used when
6489 vectorizing the operation, if available.
6490 - MULTI_STEP_CVT determines the number of required intermediate steps in
6491 case of multi-step conversion (like char->short->int - in that case
6492 MULTI_STEP_CVT will be 1).
6493 - INTERM_TYPES contains the intermediate type required to perform the
6494 widening operation (short in the above example). */
6495
6496 bool
6497 supportable_widening_operation (enum tree_code code, gimple stmt,
6498 tree vectype_out, tree vectype_in,
6499 enum tree_code *code1, enum tree_code *code2,
6500 int *multi_step_cvt,
6501 vec<tree> *interm_types)
6502 {
6503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6504 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6505 struct loop *vect_loop = NULL;
6506 enum machine_mode vec_mode;
6507 enum insn_code icode1, icode2;
6508 optab optab1, optab2;
6509 tree vectype = vectype_in;
6510 tree wide_vectype = vectype_out;
6511 enum tree_code c1, c2;
6512 int i;
6513 tree prev_type, intermediate_type;
6514 enum machine_mode intermediate_mode, prev_mode;
6515 optab optab3, optab4;
6516
6517 *multi_step_cvt = 0;
6518 if (loop_info)
6519 vect_loop = LOOP_VINFO_LOOP (loop_info);
6520
6521 switch (code)
6522 {
6523 case WIDEN_MULT_EXPR:
6524 /* The result of a vectorized widening operation usually requires
6525 two vectors (because the widened results do not fit into one vector).
6526 The generated vector results would normally be expected to be
6527 generated in the same order as in the original scalar computation,
6528 i.e. if 8 results are generated in each vector iteration, they are
6529 to be organized as follows:
6530 vect1: [res1,res2,res3,res4],
6531 vect2: [res5,res6,res7,res8].
6532
6533 However, in the special case that the result of the widening
6534 operation is used in a reduction computation only, the order doesn't
6535 matter (because when vectorizing a reduction we change the order of
6536 the computation). Some targets can take advantage of this and
6537 generate more efficient code. For example, targets like Altivec,
6538 that support widen_mult using a sequence of {mult_even,mult_odd}
6539 generate the following vectors:
6540 vect1: [res1,res3,res5,res7],
6541 vect2: [res2,res4,res6,res8].
6542
6543 When vectorizing outer-loops, we execute the inner-loop sequentially
6544 (each vectorized inner-loop iteration contributes to VF outer-loop
6545 iterations in parallel). We therefore don't allow to change the
6546 order of the computation in the inner-loop during outer-loop
6547 vectorization. */
6548 /* TODO: Another case in which order doesn't *really* matter is when we
6549 widen and then contract again, e.g. (short)((int)x * y >> 8).
6550 Normally, pack_trunc performs an even/odd permute, whereas the
6551 repack from an even/odd expansion would be an interleave, which
6552 would be significantly simpler for e.g. AVX2. */
6553 /* In any case, in order to avoid duplicating the code below, recurse
6554 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6555 are properly set up for the caller. If we fail, we'll continue with
6556 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6557 if (vect_loop
6558 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6559 && !nested_in_vect_loop_p (vect_loop, stmt)
6560 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6561 stmt, vectype_out, vectype_in,
6562 code1, code2, multi_step_cvt,
6563 interm_types))
6564 return true;
6565 c1 = VEC_WIDEN_MULT_LO_EXPR;
6566 c2 = VEC_WIDEN_MULT_HI_EXPR;
6567 break;
6568
6569 case VEC_WIDEN_MULT_EVEN_EXPR:
6570 /* Support the recursion induced just above. */
6571 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6572 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6573 break;
6574
6575 case WIDEN_LSHIFT_EXPR:
6576 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6577 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6578 break;
6579
6580 CASE_CONVERT:
6581 c1 = VEC_UNPACK_LO_EXPR;
6582 c2 = VEC_UNPACK_HI_EXPR;
6583 break;
6584
6585 case FLOAT_EXPR:
6586 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6587 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6588 break;
6589
6590 case FIX_TRUNC_EXPR:
6591 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6592 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6593 computing the operation. */
6594 return false;
6595
6596 default:
6597 gcc_unreachable ();
6598 }
6599
6600 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6601 {
6602 enum tree_code ctmp = c1;
6603 c1 = c2;
6604 c2 = ctmp;
6605 }
6606
6607 if (code == FIX_TRUNC_EXPR)
6608 {
6609 /* The signedness is determined from output operand. */
6610 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6611 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6612 }
6613 else
6614 {
6615 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6616 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6617 }
6618
6619 if (!optab1 || !optab2)
6620 return false;
6621
6622 vec_mode = TYPE_MODE (vectype);
6623 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6624 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6625 return false;
6626
6627 *code1 = c1;
6628 *code2 = c2;
6629
6630 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6631 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6632 return true;
6633
6634 /* Check if it's a multi-step conversion that can be done using intermediate
6635 types. */
6636
6637 prev_type = vectype;
6638 prev_mode = vec_mode;
6639
6640 if (!CONVERT_EXPR_CODE_P (code))
6641 return false;
6642
6643 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6644 intermediate steps in promotion sequence. We try
6645 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6646 not. */
6647 interm_types->create (MAX_INTERM_CVT_STEPS);
6648 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6649 {
6650 intermediate_mode = insn_data[icode1].operand[0].mode;
6651 intermediate_type
6652 = lang_hooks.types.type_for_mode (intermediate_mode,
6653 TYPE_UNSIGNED (prev_type));
6654 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6655 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6656
6657 if (!optab3 || !optab4
6658 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6659 || insn_data[icode1].operand[0].mode != intermediate_mode
6660 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6661 || insn_data[icode2].operand[0].mode != intermediate_mode
6662 || ((icode1 = optab_handler (optab3, intermediate_mode))
6663 == CODE_FOR_nothing)
6664 || ((icode2 = optab_handler (optab4, intermediate_mode))
6665 == CODE_FOR_nothing))
6666 break;
6667
6668 interm_types->quick_push (intermediate_type);
6669 (*multi_step_cvt)++;
6670
6671 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6672 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6673 return true;
6674
6675 prev_type = intermediate_type;
6676 prev_mode = intermediate_mode;
6677 }
6678
6679 interm_types->release ();
6680 return false;
6681 }
6682
6683
6684 /* Function supportable_narrowing_operation
6685
6686 Check whether an operation represented by the code CODE is a
6687 narrowing operation that is supported by the target platform in
6688 vector form (i.e., when operating on arguments of type VECTYPE_IN
6689 and producing a result of type VECTYPE_OUT).
6690
6691 Narrowing operations we currently support are NOP (CONVERT) and
6692 FIX_TRUNC. This function checks if these operations are supported by
6693 the target platform directly via vector tree-codes.
6694
6695 Output:
6696 - CODE1 is the code of a vector operation to be used when
6697 vectorizing the operation, if available.
6698 - MULTI_STEP_CVT determines the number of required intermediate steps in
6699 case of multi-step conversion (like int->short->char - in that case
6700 MULTI_STEP_CVT will be 1).
6701 - INTERM_TYPES contains the intermediate type required to perform the
6702 narrowing operation (short in the above example). */
6703
6704 bool
6705 supportable_narrowing_operation (enum tree_code code,
6706 tree vectype_out, tree vectype_in,
6707 enum tree_code *code1, int *multi_step_cvt,
6708 vec<tree> *interm_types)
6709 {
6710 enum machine_mode vec_mode;
6711 enum insn_code icode1;
6712 optab optab1, interm_optab;
6713 tree vectype = vectype_in;
6714 tree narrow_vectype = vectype_out;
6715 enum tree_code c1;
6716 tree intermediate_type;
6717 enum machine_mode intermediate_mode, prev_mode;
6718 int i;
6719 bool uns;
6720
6721 *multi_step_cvt = 0;
6722 switch (code)
6723 {
6724 CASE_CONVERT:
6725 c1 = VEC_PACK_TRUNC_EXPR;
6726 break;
6727
6728 case FIX_TRUNC_EXPR:
6729 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6730 break;
6731
6732 case FLOAT_EXPR:
6733 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6734 tree code and optabs used for computing the operation. */
6735 return false;
6736
6737 default:
6738 gcc_unreachable ();
6739 }
6740
6741 if (code == FIX_TRUNC_EXPR)
6742 /* The signedness is determined from output operand. */
6743 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6744 else
6745 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6746
6747 if (!optab1)
6748 return false;
6749
6750 vec_mode = TYPE_MODE (vectype);
6751 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6752 return false;
6753
6754 *code1 = c1;
6755
6756 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6757 return true;
6758
6759 /* Check if it's a multi-step conversion that can be done using intermediate
6760 types. */
6761 prev_mode = vec_mode;
6762 if (code == FIX_TRUNC_EXPR)
6763 uns = TYPE_UNSIGNED (vectype_out);
6764 else
6765 uns = TYPE_UNSIGNED (vectype);
6766
6767 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6768 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6769 costly than signed. */
6770 if (code == FIX_TRUNC_EXPR && uns)
6771 {
6772 enum insn_code icode2;
6773
6774 intermediate_type
6775 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6776 interm_optab
6777 = optab_for_tree_code (c1, intermediate_type, optab_default);
6778 if (interm_optab != unknown_optab
6779 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6780 && insn_data[icode1].operand[0].mode
6781 == insn_data[icode2].operand[0].mode)
6782 {
6783 uns = false;
6784 optab1 = interm_optab;
6785 icode1 = icode2;
6786 }
6787 }
6788
6789 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6790 intermediate steps in promotion sequence. We try
6791 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6792 interm_types->create (MAX_INTERM_CVT_STEPS);
6793 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6794 {
6795 intermediate_mode = insn_data[icode1].operand[0].mode;
6796 intermediate_type
6797 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6798 interm_optab
6799 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6800 optab_default);
6801 if (!interm_optab
6802 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6803 || insn_data[icode1].operand[0].mode != intermediate_mode
6804 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6805 == CODE_FOR_nothing))
6806 break;
6807
6808 interm_types->quick_push (intermediate_type);
6809 (*multi_step_cvt)++;
6810
6811 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6812 return true;
6813
6814 prev_mode = intermediate_mode;
6815 optab1 = interm_optab;
6816 }
6817
6818 interm_types->release ();
6819 return false;
6820 }