re PR tree-optimization/24659 (Conversions are not vectorized)
[gcc.git] / gcc / tree-vect-transform.c
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "params.h"
39 #include "recog.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
46 #include "toplev.h"
47 #include "real.h"
48
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
65
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
75
76
77 /* Function vect_get_new_vect_var.
78
79 Returns a name for a new variable. The current naming scheme appends the
80 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
81 the name of vectorizer generated variables, and appends that to NAME if
82 provided. */
83
84 static tree
85 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
86 {
87 const char *prefix;
88 tree new_vect_var;
89
90 switch (var_kind)
91 {
92 case vect_simple_var:
93 prefix = "vect_";
94 break;
95 case vect_scalar_var:
96 prefix = "stmp_";
97 break;
98 case vect_pointer_var:
99 prefix = "vect_p";
100 break;
101 default:
102 gcc_unreachable ();
103 }
104
105 if (name)
106 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
107 else
108 new_vect_var = create_tmp_var (type, prefix);
109
110 /* Mark vector typed variable as a gimple register variable. */
111 if (TREE_CODE (type) == VECTOR_TYPE)
112 DECL_GIMPLE_REG_P (new_vect_var) = true;
113
114 return new_vect_var;
115 }
116
117
118 /* Function vect_create_addr_base_for_vector_ref.
119
120 Create an expression that computes the address of the first memory location
121 that will be accessed for a data reference.
122
123 Input:
124 STMT: The statement containing the data reference.
125 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
126 OFFSET: Optional. If supplied, it is be added to the initial address.
127
128 Output:
129 1. Return an SSA_NAME whose value is the address of the memory location of
130 the first vector of the data reference.
131 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
132 these statement(s) which define the returned SSA_NAME.
133
134 FORNOW: We are only handling array accesses with step 1. */
135
136 static tree
137 vect_create_addr_base_for_vector_ref (tree stmt,
138 tree *new_stmt_list,
139 tree offset)
140 {
141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
142 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
143 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
144 tree base_name = build_fold_indirect_ref (data_ref_base);
145 tree vec_stmt;
146 tree addr_base, addr_expr;
147 tree dest, new_stmt;
148 tree base_offset = unshare_expr (DR_OFFSET (dr));
149 tree init = unshare_expr (DR_INIT (dr));
150 tree vect_ptr_type, addr_expr2;
151
152 /* Create base_offset */
153 base_offset = size_binop (PLUS_EXPR, base_offset, init);
154 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155 add_referenced_var (dest);
156 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157 append_to_statement_list_force (new_stmt, new_stmt_list);
158
159 if (offset)
160 {
161 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162 tree step;
163
164 /* For interleaved access step we divide STEP by the size of the
165 interleaving group. */
166 if (DR_GROUP_SIZE (stmt_info))
167 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
168 build_int_cst (TREE_TYPE (offset),
169 DR_GROUP_SIZE (stmt_info)));
170 else
171 step = DR_STEP (dr);
172
173 add_referenced_var (tmp);
174 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
175 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
176 base_offset, offset);
177 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
178 append_to_statement_list_force (new_stmt, new_stmt_list);
179 }
180
181 /* base + base_offset */
182 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
183 base_offset);
184
185 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
186
187 /* addr_expr = addr_base */
188 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
189 get_name (base_name));
190 add_referenced_var (addr_expr);
191 vec_stmt = fold_convert (vect_ptr_type, addr_base);
192 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
193 get_name (base_name));
194 add_referenced_var (addr_expr2);
195 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
196 append_to_statement_list_force (new_stmt, new_stmt_list);
197
198 if (vect_print_dump_info (REPORT_DETAILS))
199 {
200 fprintf (vect_dump, "created ");
201 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
202 }
203 return vec_stmt;
204 }
205
206
207 /* Function vect_create_data_ref_ptr.
208
209 Create a new pointer to vector type (vp), that points to the first location
210 accessed in the loop by STMT, along with the def-use update chain to
211 appropriately advance the pointer through the loop iterations. Also set
212 aliasing information for the pointer. This vector pointer is used by the
213 callers to this function to create a memory reference expression for vector
214 load/store access.
215
216 Input:
217 1. STMT: a stmt that references memory. Expected to be of the form
218 GIMPLE_MODIFY_STMT <name, data-ref> or
219 GIMPLE_MODIFY_STMT <data-ref, name>.
220 2. BSI: block_stmt_iterator where new stmts can be added.
221 3. OFFSET (optional): an offset to be added to the initial address accessed
222 by the data-ref in STMT.
223 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
224 pointing to the initial address.
225 5. TYPE: if not NULL indicates the required type of the data-ref
226
227 Output:
228 1. Declare a new ptr to vector_type, and have it point to the base of the
229 data reference (initial addressed accessed by the data reference).
230 For example, for vector of type V8HI, the following code is generated:
231
232 v8hi *vp;
233 vp = (v8hi *)initial_address;
234
235 if OFFSET is not supplied:
236 initial_address = &a[init];
237 if OFFSET is supplied:
238 initial_address = &a[init + OFFSET];
239
240 Return the initial_address in INITIAL_ADDRESS.
241
242 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
243 update the pointer in each iteration of the loop.
244
245 Return the increment stmt that updates the pointer in PTR_INCR.
246
247 3. Return the pointer. */
248
249 static tree
250 vect_create_data_ref_ptr (tree stmt,
251 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252 tree offset, tree *initial_address, tree *ptr_incr,
253 bool only_init, tree type)
254 {
255 tree base_name;
256 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
260 tree vect_ptr_type;
261 tree vect_ptr;
262 tree tag;
263 tree new_temp;
264 tree vec_stmt;
265 tree new_stmt_list = NULL_TREE;
266 edge pe = loop_preheader_edge (loop);
267 basic_block new_bb;
268 tree vect_ptr_init;
269 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
270
271 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
272
273 if (vect_print_dump_info (REPORT_DETAILS))
274 {
275 tree data_ref_base = base_name;
276 fprintf (vect_dump, "create vector-pointer variable to type: ");
277 print_generic_expr (vect_dump, vectype, TDF_SLIM);
278 if (TREE_CODE (data_ref_base) == VAR_DECL)
279 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
280 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
281 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
282 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
283 fprintf (vect_dump, " vectorizing a record based array ref: ");
284 else if (TREE_CODE (data_ref_base) == SSA_NAME)
285 fprintf (vect_dump, " vectorizing a pointer ref: ");
286 print_generic_expr (vect_dump, base_name, TDF_SLIM);
287 }
288
289 /** (1) Create the new vector-pointer variable: **/
290 if (type)
291 vect_ptr_type = build_pointer_type (type);
292 else
293 vect_ptr_type = build_pointer_type (vectype);
294 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
295 get_name (base_name));
296 add_referenced_var (vect_ptr);
297
298 /** (2) Add aliasing information to the new vector-pointer:
299 (The points-to info (DR_PTR_INFO) may be defined later.) **/
300
301 tag = DR_SYMBOL_TAG (dr);
302 gcc_assert (tag);
303
304 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
305 tag must be created with tag added to its may alias list. */
306 if (!MTAG_P (tag))
307 new_type_alias (vect_ptr, tag, DR_REF (dr));
308 else
309 set_symbol_mem_tag (vect_ptr, tag);
310
311 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
312
313 /** (3) Calculate the initial address the vector-pointer, and set
314 the vector-pointer to point to it before the loop: **/
315
316 /* Create: (&(base[init_val+offset]) in the loop preheader. */
317 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
318 offset);
319 pe = loop_preheader_edge (loop);
320 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
321 gcc_assert (!new_bb);
322 *initial_address = new_temp;
323
324 /* Create: p = (vectype *) initial_base */
325 vec_stmt = fold_convert (vect_ptr_type, new_temp);
326 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
327 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
328 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
329 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
330 gcc_assert (!new_bb);
331
332
333 /** (4) Handle the updating of the vector-pointer inside the loop: **/
334
335 if (only_init) /* No update in loop is required. */
336 {
337 /* Copy the points-to information if it exists. */
338 if (DR_PTR_INFO (dr))
339 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
340 return vect_ptr_init;
341 }
342 else
343 {
344 block_stmt_iterator incr_bsi;
345 bool insert_after;
346 tree indx_before_incr, indx_after_incr;
347 tree incr;
348
349 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
350 create_iv (vect_ptr_init,
351 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
352 NULL_TREE, loop, &incr_bsi, insert_after,
353 &indx_before_incr, &indx_after_incr);
354 incr = bsi_stmt (incr_bsi);
355 set_stmt_info (stmt_ann (incr),
356 new_stmt_vec_info (incr, loop_vinfo));
357
358 /* Copy the points-to information if it exists. */
359 if (DR_PTR_INFO (dr))
360 {
361 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
362 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
363 }
364 merge_alias_info (vect_ptr_init, indx_before_incr);
365 merge_alias_info (vect_ptr_init, indx_after_incr);
366 if (ptr_incr)
367 *ptr_incr = incr;
368
369 return indx_before_incr;
370 }
371 }
372
373
374 /* Function bump_vector_ptr
375
376 Increment a pointer (to a vector type) by vector-size. Connect the new
377 increment stmt to the existing def-use update-chain of the pointer.
378
379 The pointer def-use update-chain before this function:
380 DATAREF_PTR = phi (p_0, p_2)
381 ....
382 PTR_INCR: p_2 = DATAREF_PTR + step
383
384 The pointer def-use update-chain after this function:
385 DATAREF_PTR = phi (p_0, p_2)
386 ....
387 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
388 ....
389 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
390
391 Input:
392 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
393 in the loop.
394 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
395 The increment amount across iterations is also expected to be
396 vector_size.
397 BSI - location where the new update stmt is to be placed.
398 STMT - the original scalar memory-access stmt that is being vectorized.
399
400 Output: Return NEW_DATAREF_PTR as illustrated above.
401
402 */
403
404 static tree
405 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
406 tree stmt)
407 {
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
410 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
411 tree vptr_type = TREE_TYPE (dataref_ptr);
412 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
413 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
414 tree incr_stmt;
415 ssa_op_iter iter;
416 use_operand_p use_p;
417 tree new_dataref_ptr;
418
419 incr_stmt = build_gimple_modify_stmt (ptr_var,
420 build2 (PLUS_EXPR, vptr_type,
421 dataref_ptr, update));
422 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
423 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
424 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
425
426 /* Update the vector-pointer's cross-iteration increment. */
427 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
428 {
429 tree use = USE_FROM_PTR (use_p);
430
431 if (use == dataref_ptr)
432 SET_USE (use_p, new_dataref_ptr);
433 else
434 gcc_assert (tree_int_cst_compare (use, update) == 0);
435 }
436
437 /* Copy the points-to information if it exists. */
438 if (DR_PTR_INFO (dr))
439 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
440 merge_alias_info (new_dataref_ptr, dataref_ptr);
441
442 return new_dataref_ptr;
443 }
444
445
446 /* Function vect_create_destination_var.
447
448 Create a new temporary of type VECTYPE. */
449
450 static tree
451 vect_create_destination_var (tree scalar_dest, tree vectype)
452 {
453 tree vec_dest;
454 const char *new_name;
455 tree type;
456 enum vect_var_kind kind;
457
458 kind = vectype ? vect_simple_var : vect_scalar_var;
459 type = vectype ? vectype : TREE_TYPE (scalar_dest);
460
461 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
462
463 new_name = get_name (scalar_dest);
464 if (!new_name)
465 new_name = "var_";
466 vec_dest = vect_get_new_vect_var (type, kind, new_name);
467 add_referenced_var (vec_dest);
468
469 return vec_dest;
470 }
471
472
473 /* Function vect_init_vector.
474
475 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
476 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
477 used in the vectorization of STMT. */
478
479 static tree
480 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
481 {
482 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
485 tree new_var;
486 tree init_stmt;
487 tree vec_oprnd;
488 edge pe;
489 tree new_temp;
490 basic_block new_bb;
491
492 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
493 add_referenced_var (new_var);
494
495 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
496 new_temp = make_ssa_name (new_var, init_stmt);
497 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
498
499 pe = loop_preheader_edge (loop);
500 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
501 gcc_assert (!new_bb);
502
503 if (vect_print_dump_info (REPORT_DETAILS))
504 {
505 fprintf (vect_dump, "created new init_stmt: ");
506 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
507 }
508
509 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
510 return vec_oprnd;
511 }
512
513
514 /* Function get_initial_def_for_induction
515
516 Input:
517 IV_PHI - the initial value of the induction variable
518
519 Output:
520 Return a vector variable, initialized with the first VF values of
521 the induction variable. E.g., for an iv with IV_PHI='X' and
522 evolution S, for a vector of 4 units, we want to return:
523 [X, X + S, X + 2*S, X + 3*S]. */
524
525 static tree
526 get_initial_def_for_induction (tree iv_phi)
527 {
528 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
529 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
531 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
532 tree vectype = get_vectype_for_scalar_type (scalar_type);
533 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
534 edge pe = loop_preheader_edge (loop);
535 basic_block new_bb;
536 block_stmt_iterator bsi;
537 tree vec, vec_init, vec_step, t;
538 tree access_fn;
539 tree new_var;
540 tree new_name;
541 tree init_stmt;
542 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
543 tree init_expr, step_expr;
544 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
545 int i;
546 bool ok;
547 int ncopies = vf / nunits;
548 tree expr;
549 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
550 tree stmts;
551 tree stmt = NULL_TREE;
552 block_stmt_iterator si;
553 basic_block bb = bb_for_stmt (iv_phi);
554
555 gcc_assert (phi_info);
556 gcc_assert (ncopies >= 1);
557
558 /* Find the first insertion point in the BB. */
559 si = bsi_after_labels (bb);
560 stmt = bsi_stmt (si);
561
562 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
563 gcc_assert (access_fn);
564 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
565 &init_expr, &step_expr);
566 gcc_assert (ok);
567
568 /* Create the vector that holds the initial_value of the induction. */
569 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
570 add_referenced_var (new_var);
571
572 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
573 if (stmts)
574 {
575 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
576 gcc_assert (!new_bb);
577 }
578
579 t = NULL_TREE;
580 t = tree_cons (NULL_TREE, new_name, t);
581 for (i = 1; i < nunits; i++)
582 {
583 tree tmp;
584
585 /* Create: new_name = new_name + step_expr */
586 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
587 init_stmt = build_gimple_modify_stmt (new_var, tmp);
588 new_name = make_ssa_name (new_var, init_stmt);
589 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
590
591 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
592 gcc_assert (!new_bb);
593
594 if (vect_print_dump_info (REPORT_DETAILS))
595 {
596 fprintf (vect_dump, "created new init_stmt: ");
597 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
598 }
599 t = tree_cons (NULL_TREE, new_name, t);
600 }
601 vec = build_constructor_from_list (vectype, nreverse (t));
602 vec_init = vect_init_vector (stmt, vec, vectype);
603
604
605 /* Create the vector that holds the step of the induction. */
606 expr = build_int_cst (scalar_type, vf);
607 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
608 t = NULL_TREE;
609 for (i = 0; i < nunits; i++)
610 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
611 vec = build_constructor_from_list (vectype, t);
612 vec_step = vect_init_vector (stmt, vec, vectype);
613
614
615 /* Create the following def-use cycle:
616 loop prolog:
617 vec_init = [X, X+S, X+2*S, X+3*S]
618 vec_step = [VF*S, VF*S, VF*S, VF*S]
619 loop:
620 vec_iv = PHI <vec_init, vec_loop>
621 ...
622 STMT
623 ...
624 vec_loop = vec_iv + vec_step; */
625
626 /* Create the induction-phi that defines the induction-operand. */
627 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
628 add_referenced_var (vec_dest);
629 induction_phi = create_phi_node (vec_dest, loop->header);
630 set_stmt_info (get_stmt_ann (induction_phi),
631 new_stmt_vec_info (induction_phi, loop_vinfo));
632 induc_def = PHI_RESULT (induction_phi);
633
634 /* Create the iv update inside the loop */
635 new_stmt = build_gimple_modify_stmt (NULL_TREE,
636 build2 (PLUS_EXPR, vectype,
637 induc_def, vec_step));
638 vec_def = make_ssa_name (vec_dest, new_stmt);
639 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
640 bsi = bsi_for_stmt (stmt);
641 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
642
643 /* Set the arguments of the phi node: */
644 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
645 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
646
647
648 /* In case the vectorization factor (VF) is bigger than the number
649 of elements that we can fit in a vectype (nunits), we have to generate
650 more than one vector stmt - i.e - we need to "unroll" the
651 vector stmt by a factor VF/nunits. For more details see documentation
652 in vectorizable_operation. */
653
654 if (ncopies > 1)
655 {
656 stmt_vec_info prev_stmt_vinfo;
657
658 /* Create the vector that holds the step of the induction. */
659 expr = build_int_cst (scalar_type, nunits);
660 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
661 t = NULL_TREE;
662 for (i = 0; i < nunits; i++)
663 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
664 vec = build_constructor_from_list (vectype, t);
665 vec_step = vect_init_vector (stmt, vec, vectype);
666
667 vec_def = induc_def;
668 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
669 for (i = 1; i < ncopies; i++)
670 {
671 tree tmp;
672
673 /* vec_i = vec_prev + vec_{step*nunits} */
674 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
675 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
676 vec_def = make_ssa_name (vec_dest, new_stmt);
677 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
678 bsi = bsi_for_stmt (stmt);
679 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
680
681 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
682 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
683 }
684 }
685
686 if (vect_print_dump_info (REPORT_DETAILS))
687 {
688 fprintf (vect_dump, "transform induction: created def-use cycle:");
689 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
690 fprintf (vect_dump, "\n");
691 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
692 }
693
694 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
695 return induc_def;
696 }
697
698
699 /* Function vect_get_vec_def_for_operand.
700
701 OP is an operand in STMT. This function returns a (vector) def that will be
702 used in the vectorized stmt for STMT.
703
704 In the case that OP is an SSA_NAME which is defined in the loop, then
705 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
706
707 In case OP is an invariant or constant, a new stmt that creates a vector def
708 needs to be introduced. */
709
710 static tree
711 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
712 {
713 tree vec_oprnd;
714 tree vec_stmt;
715 tree def_stmt;
716 stmt_vec_info def_stmt_info = NULL;
717 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
718 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
719 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
720 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
721 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
722 tree vec_inv;
723 tree vec_cst;
724 tree t = NULL_TREE;
725 tree def;
726 int i;
727 enum vect_def_type dt;
728 bool is_simple_use;
729 tree vector_type;
730
731 if (vect_print_dump_info (REPORT_DETAILS))
732 {
733 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
734 print_generic_expr (vect_dump, op, TDF_SLIM);
735 }
736
737 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
738 gcc_assert (is_simple_use);
739 if (vect_print_dump_info (REPORT_DETAILS))
740 {
741 if (def)
742 {
743 fprintf (vect_dump, "def = ");
744 print_generic_expr (vect_dump, def, TDF_SLIM);
745 }
746 if (def_stmt)
747 {
748 fprintf (vect_dump, " def_stmt = ");
749 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
750 }
751 }
752
753 switch (dt)
754 {
755 /* Case 1: operand is a constant. */
756 case vect_constant_def:
757 {
758 if (scalar_def)
759 *scalar_def = op;
760
761 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
762 if (vect_print_dump_info (REPORT_DETAILS))
763 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
764
765 for (i = nunits - 1; i >= 0; --i)
766 {
767 t = tree_cons (NULL_TREE, op, t);
768 }
769 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
770 vec_cst = build_vector (vector_type, t);
771
772 return vect_init_vector (stmt, vec_cst, vector_type);
773 }
774
775 /* Case 2: operand is defined outside the loop - loop invariant. */
776 case vect_invariant_def:
777 {
778 if (scalar_def)
779 *scalar_def = def;
780
781 /* Create 'vec_inv = {inv,inv,..,inv}' */
782 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "Create vector_inv.");
784
785 for (i = nunits - 1; i >= 0; --i)
786 {
787 t = tree_cons (NULL_TREE, def, t);
788 }
789
790 /* FIXME: use build_constructor directly. */
791 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
792 vec_inv = build_constructor_from_list (vector_type, t);
793 return vect_init_vector (stmt, vec_inv, vector_type);
794 }
795
796 /* Case 3: operand is defined inside the loop. */
797 case vect_loop_def:
798 {
799 if (scalar_def)
800 *scalar_def = def_stmt;
801
802 /* Get the def from the vectorized stmt. */
803 def_stmt_info = vinfo_for_stmt (def_stmt);
804 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
805 gcc_assert (vec_stmt);
806 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
807 return vec_oprnd;
808 }
809
810 /* Case 4: operand is defined by a loop header phi - reduction */
811 case vect_reduction_def:
812 {
813 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
814
815 /* Get the def before the loop */
816 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
817 return get_initial_def_for_reduction (stmt, op, scalar_def);
818 }
819
820 /* Case 5: operand is defined by loop-header phi - induction. */
821 case vect_induction_def:
822 {
823 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
824
825 /* Get the def before the loop */
826 return get_initial_def_for_induction (def_stmt);
827 }
828
829 default:
830 gcc_unreachable ();
831 }
832 }
833
834
835 /* Function vect_get_vec_def_for_stmt_copy
836
837 Return a vector-def for an operand. This function is used when the
838 vectorized stmt to be created (by the caller to this function) is a "copy"
839 created in case the vectorized result cannot fit in one vector, and several
840 copies of the vector-stmt are required. In this case the vector-def is
841 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
842 of the stmt that defines VEC_OPRND.
843 DT is the type of the vector def VEC_OPRND.
844
845 Context:
846 In case the vectorization factor (VF) is bigger than the number
847 of elements that can fit in a vectype (nunits), we have to generate
848 more than one vector stmt to vectorize the scalar stmt. This situation
849 arises when there are multiple data-types operated upon in the loop; the
850 smallest data-type determines the VF, and as a result, when vectorizing
851 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
852 vector stmt (each computing a vector of 'nunits' results, and together
853 computing 'VF' results in each iteration). This function is called when
854 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
855 which VF=16 and nunits=4, so the number of copies required is 4):
856
857 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
858
859 S1: x = load VS1.0: vx.0 = memref0 VS1.1
860 VS1.1: vx.1 = memref1 VS1.2
861 VS1.2: vx.2 = memref2 VS1.3
862 VS1.3: vx.3 = memref3
863
864 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
865 VSnew.1: vz1 = vx.1 + ... VSnew.2
866 VSnew.2: vz2 = vx.2 + ... VSnew.3
867 VSnew.3: vz3 = vx.3 + ...
868
869 The vectorization of S1 is explained in vectorizable_load.
870 The vectorization of S2:
871 To create the first vector-stmt out of the 4 copies - VSnew.0 -
872 the function 'vect_get_vec_def_for_operand' is called to
873 get the relevant vector-def for each operand of S2. For operand x it
874 returns the vector-def 'vx.0'.
875
876 To create the remaining copies of the vector-stmt (VSnew.j), this
877 function is called to get the relevant vector-def for each operand. It is
878 obtained from the respective VS1.j stmt, which is recorded in the
879 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
880
881 For example, to obtain the vector-def 'vx.1' in order to create the
882 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
883 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
884 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
885 and return its def ('vx.1').
886 Overall, to create the above sequence this function will be called 3 times:
887 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
888 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
889 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
890
891 static tree
892 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
893 {
894 tree vec_stmt_for_operand;
895 stmt_vec_info def_stmt_info;
896
897 /* Do nothing; can reuse same def. */
898 if (dt == vect_invariant_def || dt == vect_constant_def )
899 return vec_oprnd;
900
901 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
902 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
903 gcc_assert (def_stmt_info);
904 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
905 gcc_assert (vec_stmt_for_operand);
906 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
907
908 return vec_oprnd;
909 }
910
911
912 /* Function vect_finish_stmt_generation.
913
914 Insert a new stmt. */
915
916 static void
917 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
918 block_stmt_iterator *bsi)
919 {
920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
921 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
922
923 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
924 set_stmt_info (get_stmt_ann (vec_stmt),
925 new_stmt_vec_info (vec_stmt, loop_vinfo));
926
927 if (vect_print_dump_info (REPORT_DETAILS))
928 {
929 fprintf (vect_dump, "add new stmt: ");
930 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
931 }
932
933 /* Make sure bsi points to the stmt that is being vectorized. */
934 gcc_assert (stmt == bsi_stmt (*bsi));
935
936 #ifdef USE_MAPPED_LOCATION
937 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
938 #else
939 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
940 #endif
941 }
942
943
944 /* Function get_initial_def_for_reduction
945
946 Input:
947 STMT - a stmt that performs a reduction operation in the loop.
948 INIT_VAL - the initial value of the reduction variable
949
950 Output:
951 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
952 of the reduction (used for adjusting the epilog - see below).
953 Return a vector variable, initialized according to the operation that STMT
954 performs. This vector will be used as the initial value of the
955 vector of partial results.
956
957 Option1 (adjust in epilog): Initialize the vector as follows:
958 add: [0,0,...,0,0]
959 mult: [1,1,...,1,1]
960 min/max: [init_val,init_val,..,init_val,init_val]
961 bit and/or: [init_val,init_val,..,init_val,init_val]
962 and when necessary (e.g. add/mult case) let the caller know
963 that it needs to adjust the result by init_val.
964
965 Option2: Initialize the vector as follows:
966 add: [0,0,...,0,init_val]
967 mult: [1,1,...,1,init_val]
968 min/max: [init_val,init_val,...,init_val]
969 bit and/or: [init_val,init_val,...,init_val]
970 and no adjustments are needed.
971
972 For example, for the following code:
973
974 s = init_val;
975 for (i=0;i<n;i++)
976 s = s + a[i];
977
978 STMT is 's = s + a[i]', and the reduction variable is 's'.
979 For a vector of 4 units, we want to return either [0,0,0,init_val],
980 or [0,0,0,0] and let the caller know that it needs to adjust
981 the result at the end by 'init_val'.
982
983 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
984 initialization vector is simpler (same element in all entries).
985 A cost model should help decide between these two schemes. */
986
987 static tree
988 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
989 {
990 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
991 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
992 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
993 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
994 tree type = TREE_TYPE (init_val);
995 tree vecdef;
996 tree def_for_init;
997 tree init_def;
998 tree t = NULL_TREE;
999 int i;
1000 tree vector_type;
1001
1002 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1003 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1004
1005 switch (code)
1006 {
1007 case WIDEN_SUM_EXPR:
1008 case DOT_PROD_EXPR:
1009 case PLUS_EXPR:
1010 *adjustment_def = init_val;
1011 /* Create a vector of zeros for init_def. */
1012 if (INTEGRAL_TYPE_P (type))
1013 def_for_init = build_int_cst (type, 0);
1014 else
1015 def_for_init = build_real (type, dconst0);
1016 for (i = nunits - 1; i >= 0; --i)
1017 t = tree_cons (NULL_TREE, def_for_init, t);
1018 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1019 init_def = build_vector (vector_type, t);
1020 break;
1021
1022 case MIN_EXPR:
1023 case MAX_EXPR:
1024 *adjustment_def = NULL_TREE;
1025 init_def = vecdef;
1026 break;
1027
1028 default:
1029 gcc_unreachable ();
1030 }
1031
1032 return init_def;
1033 }
1034
1035
1036 /* Function vect_create_epilog_for_reduction
1037
1038 Create code at the loop-epilog to finalize the result of a reduction
1039 computation.
1040
1041 VECT_DEF is a vector of partial results.
1042 REDUC_CODE is the tree-code for the epilog reduction.
1043 STMT is the scalar reduction stmt that is being vectorized.
1044 REDUCTION_PHI is the phi-node that carries the reduction computation.
1045
1046 This function:
1047 1. Creates the reduction def-use cycle: sets the arguments for
1048 REDUCTION_PHI:
1049 The loop-entry argument is the vectorized initial-value of the reduction.
1050 The loop-latch argument is VECT_DEF - the vector of partial sums.
1051 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1052 by applying the operation specified by REDUC_CODE if available, or by
1053 other means (whole-vector shifts or a scalar loop).
1054 The function also creates a new phi node at the loop exit to preserve
1055 loop-closed form, as illustrated below.
1056
1057 The flow at the entry to this function:
1058
1059 loop:
1060 vec_def = phi <null, null> # REDUCTION_PHI
1061 VECT_DEF = vector_stmt # vectorized form of STMT
1062 s_loop = scalar_stmt # (scalar) STMT
1063 loop_exit:
1064 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1065 use <s_out0>
1066 use <s_out0>
1067
1068 The above is transformed by this function into:
1069
1070 loop:
1071 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1072 VECT_DEF = vector_stmt # vectorized form of STMT
1073 s_loop = scalar_stmt # (scalar) STMT
1074 loop_exit:
1075 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1076 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1077 v_out2 = reduce <v_out1>
1078 s_out3 = extract_field <v_out2, 0>
1079 s_out4 = adjust_result <s_out3>
1080 use <s_out4>
1081 use <s_out4>
1082 */
1083
1084 static void
1085 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1086 enum tree_code reduc_code, tree reduction_phi)
1087 {
1088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1089 tree vectype;
1090 enum machine_mode mode;
1091 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1092 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1093 basic_block exit_bb;
1094 tree scalar_dest;
1095 tree scalar_type;
1096 tree new_phi;
1097 block_stmt_iterator exit_bsi;
1098 tree vec_dest;
1099 tree new_temp;
1100 tree new_name;
1101 tree epilog_stmt;
1102 tree new_scalar_dest, exit_phi;
1103 tree bitsize, bitpos, bytesize;
1104 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1105 tree scalar_initial_def;
1106 tree vec_initial_def;
1107 tree orig_name;
1108 imm_use_iterator imm_iter;
1109 use_operand_p use_p;
1110 bool extract_scalar_result;
1111 tree reduction_op;
1112 tree orig_stmt;
1113 tree use_stmt;
1114 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1115 int op_type;
1116
1117 op_type = TREE_OPERAND_LENGTH (operation);
1118 reduction_op = TREE_OPERAND (operation, op_type-1);
1119 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1120 mode = TYPE_MODE (vectype);
1121
1122 /*** 1. Create the reduction def-use cycle ***/
1123
1124 /* 1.1 set the loop-entry arg of the reduction-phi: */
1125 /* For the case of reduction, vect_get_vec_def_for_operand returns
1126 the scalar def before the loop, that defines the initial value
1127 of the reduction variable. */
1128 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1129 &scalar_initial_def);
1130 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1131
1132 /* 1.2 set the loop-latch arg for the reduction-phi: */
1133 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1134
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 {
1137 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1138 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1139 fprintf (vect_dump, "\n");
1140 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1141 }
1142
1143
1144 /*** 2. Create epilog code
1145 The reduction epilog code operates across the elements of the vector
1146 of partial results computed by the vectorized loop.
1147 The reduction epilog code consists of:
1148 step 1: compute the scalar result in a vector (v_out2)
1149 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1150 step 3: adjust the scalar result (s_out3) if needed.
1151
1152 Step 1 can be accomplished using one the following three schemes:
1153 (scheme 1) using reduc_code, if available.
1154 (scheme 2) using whole-vector shifts, if available.
1155 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1156 combined.
1157
1158 The overall epilog code looks like this:
1159
1160 s_out0 = phi <s_loop> # original EXIT_PHI
1161 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1162 v_out2 = reduce <v_out1> # step 1
1163 s_out3 = extract_field <v_out2, 0> # step 2
1164 s_out4 = adjust_result <s_out3> # step 3
1165
1166 (step 3 is optional, and step2 1 and 2 may be combined).
1167 Lastly, the uses of s_out0 are replaced by s_out4.
1168
1169 ***/
1170
1171 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1172 v_out1 = phi <v_loop> */
1173
1174 exit_bb = single_exit (loop)->dest;
1175 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1176 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1177 exit_bsi = bsi_after_labels (exit_bb);
1178
1179 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1180 (i.e. when reduc_code is not available) and in the final adjustment
1181 code (if needed). Also get the original scalar reduction variable as
1182 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1183 represents a reduction pattern), the tree-code and scalar-def are
1184 taken from the original stmt that the pattern-stmt (STMT) replaces.
1185 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1186 are taken from STMT. */
1187
1188 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1189 if (!orig_stmt)
1190 {
1191 /* Regular reduction */
1192 orig_stmt = stmt;
1193 }
1194 else
1195 {
1196 /* Reduction pattern */
1197 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1198 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1199 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1200 }
1201 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1202 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1203 scalar_type = TREE_TYPE (scalar_dest);
1204 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1205 bitsize = TYPE_SIZE (scalar_type);
1206 bytesize = TYPE_SIZE_UNIT (scalar_type);
1207
1208 /* 2.3 Create the reduction code, using one of the three schemes described
1209 above. */
1210
1211 if (reduc_code < NUM_TREE_CODES)
1212 {
1213 tree tmp;
1214
1215 /*** Case 1: Create:
1216 v_out2 = reduc_expr <v_out1> */
1217
1218 if (vect_print_dump_info (REPORT_DETAILS))
1219 fprintf (vect_dump, "Reduce using direct vector reduction.");
1220
1221 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1222 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1223 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1224 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1225 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1226 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1227
1228 extract_scalar_result = true;
1229 }
1230 else
1231 {
1232 enum tree_code shift_code = 0;
1233 bool have_whole_vector_shift = true;
1234 int bit_offset;
1235 int element_bitsize = tree_low_cst (bitsize, 1);
1236 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1237 tree vec_temp;
1238
1239 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1240 shift_code = VEC_RSHIFT_EXPR;
1241 else
1242 have_whole_vector_shift = false;
1243
1244 /* Regardless of whether we have a whole vector shift, if we're
1245 emulating the operation via tree-vect-generic, we don't want
1246 to use it. Only the first round of the reduction is likely
1247 to still be profitable via emulation. */
1248 /* ??? It might be better to emit a reduction tree code here, so that
1249 tree-vect-generic can expand the first round via bit tricks. */
1250 if (!VECTOR_MODE_P (mode))
1251 have_whole_vector_shift = false;
1252 else
1253 {
1254 optab optab = optab_for_tree_code (code, vectype);
1255 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1256 have_whole_vector_shift = false;
1257 }
1258
1259 if (have_whole_vector_shift)
1260 {
1261 /*** Case 2: Create:
1262 for (offset = VS/2; offset >= element_size; offset/=2)
1263 {
1264 Create: va' = vec_shift <va, offset>
1265 Create: va = vop <va, va'>
1266 } */
1267
1268 if (vect_print_dump_info (REPORT_DETAILS))
1269 fprintf (vect_dump, "Reduce using vector shifts");
1270
1271 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1272 new_temp = PHI_RESULT (new_phi);
1273
1274 for (bit_offset = vec_size_in_bits/2;
1275 bit_offset >= element_bitsize;
1276 bit_offset /= 2)
1277 {
1278 tree bitpos = size_int (bit_offset);
1279 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1280 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1281 new_name = make_ssa_name (vec_dest, epilog_stmt);
1282 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1283 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1284
1285 tmp = build2 (code, vectype, new_name, new_temp);
1286 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1287 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1288 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1289 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1290 }
1291
1292 extract_scalar_result = true;
1293 }
1294 else
1295 {
1296 tree rhs;
1297
1298 /*** Case 3: Create:
1299 s = extract_field <v_out2, 0>
1300 for (offset = element_size;
1301 offset < vector_size;
1302 offset += element_size;)
1303 {
1304 Create: s' = extract_field <v_out2, offset>
1305 Create: s = op <s, s'>
1306 } */
1307
1308 if (vect_print_dump_info (REPORT_DETAILS))
1309 fprintf (vect_dump, "Reduce using scalar code. ");
1310
1311 vec_temp = PHI_RESULT (new_phi);
1312 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1313 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1314 bitsize_zero_node);
1315 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1316 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1317 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1318 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1319 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1320
1321 for (bit_offset = element_bitsize;
1322 bit_offset < vec_size_in_bits;
1323 bit_offset += element_bitsize)
1324 {
1325 tree tmp;
1326 tree bitpos = bitsize_int (bit_offset);
1327 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1328 bitpos);
1329
1330 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1331 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1332 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1333 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1334 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1335
1336 tmp = build2 (code, scalar_type, new_name, new_temp);
1337 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1338 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1339 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1340 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1341 }
1342
1343 extract_scalar_result = false;
1344 }
1345 }
1346
1347 /* 2.4 Extract the final scalar result. Create:
1348 s_out3 = extract_field <v_out2, bitpos> */
1349
1350 if (extract_scalar_result)
1351 {
1352 tree rhs;
1353
1354 if (vect_print_dump_info (REPORT_DETAILS))
1355 fprintf (vect_dump, "extract scalar result");
1356
1357 if (BYTES_BIG_ENDIAN)
1358 bitpos = size_binop (MULT_EXPR,
1359 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1360 TYPE_SIZE (scalar_type));
1361 else
1362 bitpos = bitsize_zero_node;
1363
1364 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1365 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1366 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1367 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1368 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1369 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1370 }
1371
1372 /* 2.4 Adjust the final result by the initial value of the reduction
1373 variable. (When such adjustment is not needed, then
1374 'scalar_initial_def' is zero).
1375
1376 Create:
1377 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1378
1379 if (scalar_initial_def)
1380 {
1381 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1382 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1383 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1384 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1385 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1386 }
1387
1388 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1389
1390 /* Find the loop-closed-use at the loop exit of the original scalar result.
1391 (The reduction result is expected to have two immediate uses - one at the
1392 latch block, and one at the loop exit). */
1393 exit_phi = NULL;
1394 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1395 {
1396 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1397 {
1398 exit_phi = USE_STMT (use_p);
1399 break;
1400 }
1401 }
1402 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1403 gcc_assert (exit_phi);
1404 /* Replace the uses: */
1405 orig_name = PHI_RESULT (exit_phi);
1406 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1407 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1408 SET_USE (use_p, new_temp);
1409 }
1410
1411
1412 /* Function vectorizable_reduction.
1413
1414 Check if STMT performs a reduction operation that can be vectorized.
1415 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1416 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1417 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1418
1419 This function also handles reduction idioms (patterns) that have been
1420 recognized in advance during vect_pattern_recog. In this case, STMT may be
1421 of this form:
1422 X = pattern_expr (arg0, arg1, ..., X)
1423 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1424 sequence that had been detected and replaced by the pattern-stmt (STMT).
1425
1426 In some cases of reduction patterns, the type of the reduction variable X is
1427 different than the type of the other arguments of STMT.
1428 In such cases, the vectype that is used when transforming STMT into a vector
1429 stmt is different than the vectype that is used to determine the
1430 vectorization factor, because it consists of a different number of elements
1431 than the actual number of elements that are being operated upon in parallel.
1432
1433 For example, consider an accumulation of shorts into an int accumulator.
1434 On some targets it's possible to vectorize this pattern operating on 8
1435 shorts at a time (hence, the vectype for purposes of determining the
1436 vectorization factor should be V8HI); on the other hand, the vectype that
1437 is used to create the vector form is actually V4SI (the type of the result).
1438
1439 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1440 indicates what is the actual level of parallelism (V8HI in the example), so
1441 that the right vectorization factor would be derived. This vectype
1442 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1443 be used to create the vectorized stmt. The right vectype for the vectorized
1444 stmt is obtained from the type of the result X:
1445 get_vectype_for_scalar_type (TREE_TYPE (X))
1446
1447 This means that, contrary to "regular" reductions (or "regular" stmts in
1448 general), the following equation:
1449 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1450 does *NOT* necessarily hold for reduction patterns. */
1451
1452 bool
1453 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1454 {
1455 tree vec_dest;
1456 tree scalar_dest;
1457 tree op;
1458 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1459 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1460 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1463 tree operation;
1464 enum tree_code code, orig_code, epilog_reduc_code = 0;
1465 enum machine_mode vec_mode;
1466 int op_type;
1467 optab optab, reduc_optab;
1468 tree new_temp = NULL_TREE;
1469 tree def, def_stmt;
1470 enum vect_def_type dt;
1471 tree new_phi;
1472 tree scalar_type;
1473 bool is_simple_use;
1474 tree orig_stmt;
1475 stmt_vec_info orig_stmt_info;
1476 tree expr = NULL_TREE;
1477 int i;
1478 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1479 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1480 stmt_vec_info prev_stmt_info;
1481 tree reduc_def;
1482 tree new_stmt = NULL_TREE;
1483 int j;
1484
1485 gcc_assert (ncopies >= 1);
1486
1487 /* 1. Is vectorizable reduction? */
1488
1489 /* Not supportable if the reduction variable is used in the loop. */
1490 if (STMT_VINFO_RELEVANT_P (stmt_info))
1491 return false;
1492
1493 if (!STMT_VINFO_LIVE_P (stmt_info))
1494 return false;
1495
1496 /* Make sure it was already recognized as a reduction computation. */
1497 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1498 return false;
1499
1500 /* 2. Has this been recognized as a reduction pattern?
1501
1502 Check if STMT represents a pattern that has been recognized
1503 in earlier analysis stages. For stmts that represent a pattern,
1504 the STMT_VINFO_RELATED_STMT field records the last stmt in
1505 the original sequence that constitutes the pattern. */
1506
1507 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1508 if (orig_stmt)
1509 {
1510 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1511 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1512 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1513 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1514 }
1515
1516 /* 3. Check the operands of the operation. The first operands are defined
1517 inside the loop body. The last operand is the reduction variable,
1518 which is defined by the loop-header-phi. */
1519
1520 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1521
1522 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1523 code = TREE_CODE (operation);
1524 op_type = TREE_OPERAND_LENGTH (operation);
1525 if (op_type != binary_op && op_type != ternary_op)
1526 return false;
1527 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1528 scalar_type = TREE_TYPE (scalar_dest);
1529
1530 /* All uses but the last are expected to be defined in the loop.
1531 The last use is the reduction variable. */
1532 for (i = 0; i < op_type-1; i++)
1533 {
1534 op = TREE_OPERAND (operation, i);
1535 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1536 gcc_assert (is_simple_use);
1537 if (dt != vect_loop_def
1538 && dt != vect_invariant_def
1539 && dt != vect_constant_def
1540 && dt != vect_induction_def)
1541 return false;
1542 }
1543
1544 op = TREE_OPERAND (operation, i);
1545 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1546 gcc_assert (is_simple_use);
1547 gcc_assert (dt == vect_reduction_def);
1548 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1549 if (orig_stmt)
1550 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1551 else
1552 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1553
1554 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1555 return false;
1556
1557 /* 4. Supportable by target? */
1558
1559 /* 4.1. check support for the operation in the loop */
1560 optab = optab_for_tree_code (code, vectype);
1561 if (!optab)
1562 {
1563 if (vect_print_dump_info (REPORT_DETAILS))
1564 fprintf (vect_dump, "no optab.");
1565 return false;
1566 }
1567 vec_mode = TYPE_MODE (vectype);
1568 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1569 {
1570 if (vect_print_dump_info (REPORT_DETAILS))
1571 fprintf (vect_dump, "op not supported by target.");
1572 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1573 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1574 < vect_min_worthwhile_factor (code))
1575 return false;
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "proceeding using word mode.");
1578 }
1579
1580 /* Worthwhile without SIMD support? */
1581 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1582 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1583 < vect_min_worthwhile_factor (code))
1584 {
1585 if (vect_print_dump_info (REPORT_DETAILS))
1586 fprintf (vect_dump, "not worthwhile without SIMD support.");
1587 return false;
1588 }
1589
1590 /* 4.2. Check support for the epilog operation.
1591
1592 If STMT represents a reduction pattern, then the type of the
1593 reduction variable may be different than the type of the rest
1594 of the arguments. For example, consider the case of accumulation
1595 of shorts into an int accumulator; The original code:
1596 S1: int_a = (int) short_a;
1597 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1598
1599 was replaced with:
1600 STMT: int_acc = widen_sum <short_a, int_acc>
1601
1602 This means that:
1603 1. The tree-code that is used to create the vector operation in the
1604 epilog code (that reduces the partial results) is not the
1605 tree-code of STMT, but is rather the tree-code of the original
1606 stmt from the pattern that STMT is replacing. I.e, in the example
1607 above we want to use 'widen_sum' in the loop, but 'plus' in the
1608 epilog.
1609 2. The type (mode) we use to check available target support
1610 for the vector operation to be created in the *epilog*, is
1611 determined by the type of the reduction variable (in the example
1612 above we'd check this: plus_optab[vect_int_mode]).
1613 However the type (mode) we use to check available target support
1614 for the vector operation to be created *inside the loop*, is
1615 determined by the type of the other arguments to STMT (in the
1616 example we'd check this: widen_sum_optab[vect_short_mode]).
1617
1618 This is contrary to "regular" reductions, in which the types of all
1619 the arguments are the same as the type of the reduction variable.
1620 For "regular" reductions we can therefore use the same vector type
1621 (and also the same tree-code) when generating the epilog code and
1622 when generating the code inside the loop. */
1623
1624 if (orig_stmt)
1625 {
1626 /* This is a reduction pattern: get the vectype from the type of the
1627 reduction variable, and get the tree-code from orig_stmt. */
1628 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1629 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1630 vec_mode = TYPE_MODE (vectype);
1631 }
1632 else
1633 {
1634 /* Regular reduction: use the same vectype and tree-code as used for
1635 the vector code inside the loop can be used for the epilog code. */
1636 orig_code = code;
1637 }
1638
1639 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1640 return false;
1641 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1642 if (!reduc_optab)
1643 {
1644 if (vect_print_dump_info (REPORT_DETAILS))
1645 fprintf (vect_dump, "no optab for reduction.");
1646 epilog_reduc_code = NUM_TREE_CODES;
1647 }
1648 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1649 {
1650 if (vect_print_dump_info (REPORT_DETAILS))
1651 fprintf (vect_dump, "reduc op not supported by target.");
1652 epilog_reduc_code = NUM_TREE_CODES;
1653 }
1654
1655 if (!vec_stmt) /* transformation not required. */
1656 {
1657 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1658 return true;
1659 }
1660
1661 /** Transform. **/
1662
1663 if (vect_print_dump_info (REPORT_DETAILS))
1664 fprintf (vect_dump, "transform reduction.");
1665
1666 /* Create the destination vector */
1667 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1668
1669 /* Create the reduction-phi that defines the reduction-operand. */
1670 new_phi = create_phi_node (vec_dest, loop->header);
1671
1672 /* In case the vectorization factor (VF) is bigger than the number
1673 of elements that we can fit in a vectype (nunits), we have to generate
1674 more than one vector stmt - i.e - we need to "unroll" the
1675 vector stmt by a factor VF/nunits. For more details see documentation
1676 in vectorizable_operation. */
1677
1678 prev_stmt_info = NULL;
1679 for (j = 0; j < ncopies; j++)
1680 {
1681 /* Handle uses. */
1682 if (j == 0)
1683 {
1684 op = TREE_OPERAND (operation, 0);
1685 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1686 if (op_type == ternary_op)
1687 {
1688 op = TREE_OPERAND (operation, 1);
1689 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1690 }
1691
1692 /* Get the vector def for the reduction variable from the phi node */
1693 reduc_def = PHI_RESULT (new_phi);
1694 }
1695 else
1696 {
1697 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1698 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1699 if (op_type == ternary_op)
1700 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1701
1702 /* Get the vector def for the reduction variable from the vectorized
1703 reduction operation generated in the previous iteration (j-1) */
1704 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
1705 }
1706
1707 /* Arguments are ready. create the new vector stmt. */
1708 if (op_type == binary_op)
1709 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1710 else
1711 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1712 reduc_def);
1713 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
1714 new_temp = make_ssa_name (vec_dest, new_stmt);
1715 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
1716 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1717
1718 if (j == 0)
1719 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1720 else
1721 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1722 prev_stmt_info = vinfo_for_stmt (new_stmt);
1723 }
1724
1725 /* Finalize the reduction-phi (set it's arguments) and create the
1726 epilog reduction code. */
1727 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1728 return true;
1729 }
1730
1731 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1732 a function declaration if the target has a vectorized version
1733 of the function, or NULL_TREE if the function cannot be vectorized. */
1734
1735 tree
1736 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
1737 {
1738 tree fndecl = get_callee_fndecl (call);
1739 enum built_in_function code;
1740
1741 /* We only handle functions that do not read or clobber memory -- i.e.
1742 const or novops ones. */
1743 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1744 return NULL_TREE;
1745
1746 if (!fndecl
1747 || TREE_CODE (fndecl) != FUNCTION_DECL
1748 || !DECL_BUILT_IN (fndecl))
1749 return NULL_TREE;
1750
1751 code = DECL_FUNCTION_CODE (fndecl);
1752 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1753 vectype_in);
1754 }
1755
1756 /* Function vectorizable_call.
1757
1758 Check if STMT performs a function call that can be vectorized.
1759 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1760 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1761 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1762
1763 bool
1764 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1765 {
1766 tree vec_dest;
1767 tree scalar_dest;
1768 tree operation;
1769 tree op, type;
1770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1771 tree vectype_out, vectype_in;
1772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
1774 enum vect_def_type dt[2];
1775 int ncopies, j, nargs;
1776 call_expr_arg_iterator iter;
1777
1778 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1779 return false;
1780
1781 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1782 return false;
1783
1784 /* FORNOW: not yet supported. */
1785 if (STMT_VINFO_LIVE_P (stmt_info))
1786 {
1787 if (vect_print_dump_info (REPORT_DETAILS))
1788 fprintf (vect_dump, "value used after loop.");
1789 return false;
1790 }
1791
1792 /* Is STMT a vectorizable call? */
1793 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1794 return false;
1795
1796 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
1797 return false;
1798
1799 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1800 if (TREE_CODE (operation) != CALL_EXPR)
1801 return false;
1802
1803 /* Process function arguments. */
1804 rhs_type = NULL_TREE;
1805 nargs = 0;
1806 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
1807 {
1808 ++nargs;
1809
1810 /* Bail out if the function has more than two arguments, we
1811 do not have interesting builtin functions to vectorize with
1812 more than two arguments. */
1813 if (nargs > 2)
1814 return false;
1815
1816 /* We can only handle calls with arguments of the same type. */
1817 if (rhs_type
1818 && rhs_type != TREE_TYPE (op))
1819 {
1820 if (vect_print_dump_info (REPORT_DETAILS))
1821 fprintf (vect_dump, "argument types differ.");
1822 return false;
1823 }
1824 rhs_type = TREE_TYPE (op);
1825
1826 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs-1]))
1827 {
1828 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "use not simple.");
1830 return false;
1831 }
1832 }
1833
1834 /* No arguments is also not good. */
1835 if (nargs == 0)
1836 return false;
1837
1838 vectype_in = get_vectype_for_scalar_type (rhs_type);
1839
1840 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
1841 vectype_out = get_vectype_for_scalar_type (lhs_type);
1842
1843 /* Only handle the case of vectors with the same number of elements.
1844 FIXME: We need a way to handle for example the SSE2 cvtpd2dq
1845 instruction which converts V2DFmode to V4SImode but only
1846 using the lower half of the V4SImode result. */
1847 if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
1848 return false;
1849
1850 /* For now, we only vectorize functions if a target specific builtin
1851 is available. TODO -- in some cases, it might be profitable to
1852 insert the calls for pieces of the vector, in order to be able
1853 to vectorize other operations in the loop. */
1854 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
1855 if (fndecl == NULL_TREE)
1856 {
1857 if (vect_print_dump_info (REPORT_DETAILS))
1858 fprintf (vect_dump, "function is not vectorizable.");
1859
1860 return false;
1861 }
1862
1863 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
1864
1865 if (!vec_stmt) /* transformation not required. */
1866 {
1867 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1868 return true;
1869 }
1870
1871 /** Transform. **/
1872
1873 if (vect_print_dump_info (REPORT_DETAILS))
1874 fprintf (vect_dump, "transform operation.");
1875
1876 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1877 / TYPE_VECTOR_SUBPARTS (vectype_out));
1878 gcc_assert (ncopies >= 1);
1879
1880 /* Handle def. */
1881 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1882 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1883
1884 prev_stmt_info = NULL;
1885 for (j = 0; j < ncopies; ++j)
1886 {
1887 tree new_stmt, vargs;
1888 tree vec_oprnd[2];
1889 int n;
1890
1891 /* Build argument list for the vectorized call. */
1892 /* FIXME: Rewrite this so that it doesn't construct a temporary
1893 list. */
1894 vargs = NULL_TREE;
1895 n = -1;
1896 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
1897 {
1898 ++n;
1899
1900 if (j == 0)
1901 vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
1902 else
1903 vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
1904
1905 vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
1906 }
1907 vargs = nreverse (vargs);
1908
1909 rhs = build_function_call_expr (fndecl, vargs);
1910 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
1911 new_temp = make_ssa_name (vec_dest, new_stmt);
1912 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
1913
1914 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1915
1916 if (j == 0)
1917 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1918 else
1919 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1920 prev_stmt_info = vinfo_for_stmt (new_stmt);
1921 }
1922
1923 /* The call in STMT might prevent it from being removed in dce. We however
1924 cannot remove it here, due to the way the ssa name it defines is mapped
1925 to the new definition. So just replace rhs of the statement with something
1926 harmless. */
1927 type = TREE_TYPE (scalar_dest);
1928 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
1929
1930 return true;
1931 }
1932
1933
1934 /* Function vect_gen_widened_results_half
1935
1936 Create a vector stmt whose code, type, number of arguments, and result
1937 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
1938 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1939 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1940 needs to be created (DECL is a function-decl of a target-builtin).
1941 STMT is the original scalar stmt that we are vectorizing. */
1942
1943 static tree
1944 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
1945 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1946 tree vec_dest, block_stmt_iterator *bsi,
1947 tree stmt)
1948 {
1949 tree expr;
1950 tree new_stmt;
1951 tree new_temp;
1952 tree sym;
1953 ssa_op_iter iter;
1954
1955 /* Generate half of the widened result: */
1956 if (code == CALL_EXPR)
1957 {
1958 /* Target specific support */
1959 if (op_type == binary_op)
1960 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
1961 else
1962 expr = build_call_expr (decl, 1, vec_oprnd0);
1963 }
1964 else
1965 {
1966 /* Generic support */
1967 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1968 if (op_type == binary_op)
1969 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
1970 else
1971 expr = build1 (code, vectype, vec_oprnd0);
1972 }
1973 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
1974 new_temp = make_ssa_name (vec_dest, new_stmt);
1975 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
1976 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1977
1978 if (code == CALL_EXPR)
1979 {
1980 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
1981 {
1982 if (TREE_CODE (sym) == SSA_NAME)
1983 sym = SSA_NAME_VAR (sym);
1984 mark_sym_for_renaming (sym);
1985 }
1986 }
1987
1988 return new_stmt;
1989 }
1990
1991
1992 /* Function vectorizable_conversion.
1993
1994 Check if STMT performs a conversion operation, that can be vectorized.
1995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1998
1999 bool
2000 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2001 tree * vec_stmt)
2002 {
2003 tree vec_dest;
2004 tree scalar_dest;
2005 tree operation;
2006 tree op0;
2007 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2010 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2011 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2012 tree new_temp;
2013 tree def, def_stmt;
2014 enum vect_def_type dt0;
2015 tree new_stmt;
2016 stmt_vec_info prev_stmt_info;
2017 int nunits_in;
2018 int nunits_out;
2019 tree vectype_out, vectype_in;
2020 int ncopies, j;
2021 tree expr;
2022 tree rhs_type, lhs_type;
2023 tree builtin_decl;
2024 enum { NARROW, NONE, WIDEN } modifier;
2025
2026 /* Is STMT a vectorizable conversion? */
2027
2028 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2029 return false;
2030
2031 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2032 return false;
2033
2034 if (STMT_VINFO_LIVE_P (stmt_info))
2035 {
2036 /* FORNOW: not yet supported. */
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "value used after loop.");
2039 return false;
2040 }
2041
2042 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2043 return false;
2044
2045 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2046 return false;
2047
2048 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2049 code = TREE_CODE (operation);
2050 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2051 return false;
2052
2053 /* Check types of lhs and rhs */
2054 op0 = TREE_OPERAND (operation, 0);
2055 rhs_type = TREE_TYPE (op0);
2056 vectype_in = get_vectype_for_scalar_type (rhs_type);
2057 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2058
2059 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2060 lhs_type = TREE_TYPE (scalar_dest);
2061 vectype_out = get_vectype_for_scalar_type (lhs_type);
2062 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2063
2064 /* FORNOW */
2065 if (nunits_in == nunits_out / 2)
2066 modifier = NARROW;
2067 else if (nunits_out == nunits_in)
2068 modifier = NONE;
2069 else if (nunits_out == nunits_in / 2)
2070 modifier = WIDEN;
2071 else
2072 return false;
2073
2074 if (modifier == NONE)
2075 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2076
2077 /* Bail out if the types are both integral or non-integral */
2078 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2079 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2080 return false;
2081
2082 if (modifier == NARROW)
2083 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2084 else
2085 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2086
2087 /* Sanity check: make sure that at least one copy of the vectorized stmt
2088 needs to be generated. */
2089 gcc_assert (ncopies >= 1);
2090
2091 /* Check the operands of the operation. */
2092 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2093 {
2094 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "use not simple.");
2096 return false;
2097 }
2098
2099 /* Supportable by target? */
2100 if ((modifier == NONE
2101 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2102 || (modifier == WIDEN
2103 && !supportable_widening_operation (code, stmt, vectype_in,
2104 &decl1, &decl2,
2105 &code1, &code2))
2106 || (modifier == NARROW
2107 && !supportable_narrowing_operation (code, stmt, vectype_in,
2108 &code1)))
2109 {
2110 if (vect_print_dump_info (REPORT_DETAILS))
2111 fprintf (vect_dump, "op not supported by target.");
2112 return false;
2113 }
2114
2115 if (modifier != NONE)
2116 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2117
2118 if (!vec_stmt) /* transformation not required. */
2119 {
2120 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2121 return true;
2122 }
2123
2124 /** Transform. **/
2125 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump, "transform conversion.");
2127
2128 /* Handle def. */
2129 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2130
2131 prev_stmt_info = NULL;
2132 switch (modifier)
2133 {
2134 case NONE:
2135 for (j = 0; j < ncopies; j++)
2136 {
2137 tree sym;
2138 ssa_op_iter iter;
2139
2140 if (j == 0)
2141 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2142 else
2143 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2144
2145 builtin_decl =
2146 targetm.vectorize.builtin_conversion (code, vectype_in);
2147 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2148
2149 /* Arguments are ready. create the new vector stmt. */
2150 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2151 new_temp = make_ssa_name (vec_dest, new_stmt);
2152 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2153 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2154 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2155 {
2156 if (TREE_CODE (sym) == SSA_NAME)
2157 sym = SSA_NAME_VAR (sym);
2158 mark_sym_for_renaming (sym);
2159 }
2160
2161 if (j == 0)
2162 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2163 else
2164 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2165 prev_stmt_info = vinfo_for_stmt (new_stmt);
2166 }
2167 break;
2168
2169 case WIDEN:
2170 /* In case the vectorization factor (VF) is bigger than the number
2171 of elements that we can fit in a vectype (nunits), we have to
2172 generate more than one vector stmt - i.e - we need to "unroll"
2173 the vector stmt by a factor VF/nunits. */
2174 for (j = 0; j < ncopies; j++)
2175 {
2176 if (j == 0)
2177 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2178 else
2179 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2180
2181 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2182
2183 /* Generate first half of the widened result: */
2184 new_stmt
2185 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2186 vec_oprnd0, vec_oprnd1,
2187 unary_op, vec_dest, bsi, stmt);
2188 if (j == 0)
2189 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2190 else
2191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2192 prev_stmt_info = vinfo_for_stmt (new_stmt);
2193
2194 /* Generate second half of the widened result: */
2195 new_stmt
2196 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2197 vec_oprnd0, vec_oprnd1,
2198 unary_op, vec_dest, bsi, stmt);
2199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2200 prev_stmt_info = vinfo_for_stmt (new_stmt);
2201 }
2202 break;
2203
2204 case NARROW:
2205 /* In case the vectorization factor (VF) is bigger than the number
2206 of elements that we can fit in a vectype (nunits), we have to
2207 generate more than one vector stmt - i.e - we need to "unroll"
2208 the vector stmt by a factor VF/nunits. */
2209 for (j = 0; j < ncopies; j++)
2210 {
2211 /* Handle uses. */
2212 if (j == 0)
2213 {
2214 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2215 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2216 }
2217 else
2218 {
2219 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2220 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2221 }
2222
2223 /* Arguments are ready. Create the new vector stmt. */
2224 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2225 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2226 new_temp = make_ssa_name (vec_dest, new_stmt);
2227 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2228 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2229
2230 if (j == 0)
2231 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2232 else
2233 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2234
2235 prev_stmt_info = vinfo_for_stmt (new_stmt);
2236 }
2237
2238 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2239 }
2240 return true;
2241 }
2242
2243
2244 /* Function vectorizable_assignment.
2245
2246 Check if STMT performs an assignment (copy) that can be vectorized.
2247 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2248 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2249 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2250
2251 bool
2252 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2253 {
2254 tree vec_dest;
2255 tree scalar_dest;
2256 tree op;
2257 tree vec_oprnd;
2258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2261 tree new_temp;
2262 tree def, def_stmt;
2263 enum vect_def_type dt;
2264 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2265 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2266
2267 gcc_assert (ncopies >= 1);
2268 if (ncopies > 1)
2269 return false; /* FORNOW */
2270
2271 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2272 return false;
2273
2274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2275 return false;
2276
2277 /* FORNOW: not yet supported. */
2278 if (STMT_VINFO_LIVE_P (stmt_info))
2279 {
2280 if (vect_print_dump_info (REPORT_DETAILS))
2281 fprintf (vect_dump, "value used after loop.");
2282 return false;
2283 }
2284
2285 /* Is vectorizable assignment? */
2286 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2287 return false;
2288
2289 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2290 if (TREE_CODE (scalar_dest) != SSA_NAME)
2291 return false;
2292
2293 op = GIMPLE_STMT_OPERAND (stmt, 1);
2294 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2295 {
2296 if (vect_print_dump_info (REPORT_DETAILS))
2297 fprintf (vect_dump, "use not simple.");
2298 return false;
2299 }
2300
2301 if (!vec_stmt) /* transformation not required. */
2302 {
2303 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2304 return true;
2305 }
2306
2307 /** Transform. **/
2308 if (vect_print_dump_info (REPORT_DETAILS))
2309 fprintf (vect_dump, "transform assignment.");
2310
2311 /* Handle def. */
2312 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2313
2314 /* Handle use. */
2315 op = GIMPLE_STMT_OPERAND (stmt, 1);
2316 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2317
2318 /* Arguments are ready. create the new vector stmt. */
2319 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2320 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2321 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2322 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2323
2324 return true;
2325 }
2326
2327
2328 /* Function vect_min_worthwhile_factor.
2329
2330 For a loop where we could vectorize the operation indicated by CODE,
2331 return the minimum vectorization factor that makes it worthwhile
2332 to use generic vectors. */
2333 static int
2334 vect_min_worthwhile_factor (enum tree_code code)
2335 {
2336 switch (code)
2337 {
2338 case PLUS_EXPR:
2339 case MINUS_EXPR:
2340 case NEGATE_EXPR:
2341 return 4;
2342
2343 case BIT_AND_EXPR:
2344 case BIT_IOR_EXPR:
2345 case BIT_XOR_EXPR:
2346 case BIT_NOT_EXPR:
2347 return 2;
2348
2349 default:
2350 return INT_MAX;
2351 }
2352 }
2353
2354
2355 /* Function vectorizable_induction
2356
2357 Check if PHI performs an induction computation that can be vectorized.
2358 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
2359 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
2360 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2361
2362 bool
2363 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2364 tree *vec_stmt)
2365 {
2366 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
2367 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2368 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2369 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2370 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2371 tree vec_def;
2372
2373 gcc_assert (ncopies >= 1);
2374
2375 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2376 return false;
2377
2378 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
2379
2380 if (STMT_VINFO_LIVE_P (stmt_info))
2381 {
2382 /* FORNOW: not yet supported. */
2383 if (vect_print_dump_info (REPORT_DETAILS))
2384 fprintf (vect_dump, "value used after loop.");
2385 return false;
2386 }
2387
2388 if (TREE_CODE (phi) != PHI_NODE)
2389 return false;
2390
2391 if (!vec_stmt) /* transformation not required. */
2392 {
2393 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
2394 return true;
2395 }
2396
2397 /** Transform. **/
2398
2399 if (vect_print_dump_info (REPORT_DETAILS))
2400 fprintf (vect_dump, "transform induction phi.");
2401
2402 vec_def = get_initial_def_for_induction (phi);
2403 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
2404 return true;
2405 }
2406
2407
2408 /* Function vectorizable_operation.
2409
2410 Check if STMT performs a binary or unary operation that can be vectorized.
2411 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2412 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2413 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2414
2415 bool
2416 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2417 {
2418 tree vec_dest;
2419 tree scalar_dest;
2420 tree operation;
2421 tree op0, op1 = NULL;
2422 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2426 enum tree_code code;
2427 enum machine_mode vec_mode;
2428 tree new_temp;
2429 int op_type;
2430 optab optab;
2431 int icode;
2432 enum machine_mode optab_op2_mode;
2433 tree def, def_stmt;
2434 enum vect_def_type dt0, dt1;
2435 tree new_stmt;
2436 stmt_vec_info prev_stmt_info;
2437 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2438 int nunits_out;
2439 tree vectype_out;
2440 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2441 int j;
2442
2443 gcc_assert (ncopies >= 1);
2444
2445 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2446 return false;
2447
2448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2449 return false;
2450
2451 /* FORNOW: not yet supported. */
2452 if (STMT_VINFO_LIVE_P (stmt_info))
2453 {
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "value used after loop.");
2456 return false;
2457 }
2458
2459 /* Is STMT a vectorizable binary/unary operation? */
2460 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2461 return false;
2462
2463 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2464 return false;
2465
2466 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2467 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2468 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2469 if (nunits_out != nunits_in)
2470 return false;
2471
2472 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2473 code = TREE_CODE (operation);
2474 optab = optab_for_tree_code (code, vectype);
2475
2476 /* Support only unary or binary operations. */
2477 op_type = TREE_OPERAND_LENGTH (operation);
2478 if (op_type != unary_op && op_type != binary_op)
2479 {
2480 if (vect_print_dump_info (REPORT_DETAILS))
2481 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2482 return false;
2483 }
2484
2485 op0 = TREE_OPERAND (operation, 0);
2486 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2487 {
2488 if (vect_print_dump_info (REPORT_DETAILS))
2489 fprintf (vect_dump, "use not simple.");
2490 return false;
2491 }
2492
2493 if (op_type == binary_op)
2494 {
2495 op1 = TREE_OPERAND (operation, 1);
2496 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2497 {
2498 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "use not simple.");
2500 return false;
2501 }
2502 }
2503
2504 /* Supportable by target? */
2505 if (!optab)
2506 {
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "no optab.");
2509 return false;
2510 }
2511 vec_mode = TYPE_MODE (vectype);
2512 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2513 if (icode == CODE_FOR_nothing)
2514 {
2515 if (vect_print_dump_info (REPORT_DETAILS))
2516 fprintf (vect_dump, "op not supported by target.");
2517 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2518 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2519 < vect_min_worthwhile_factor (code))
2520 return false;
2521 if (vect_print_dump_info (REPORT_DETAILS))
2522 fprintf (vect_dump, "proceeding using word mode.");
2523 }
2524
2525 /* Worthwhile without SIMD support? */
2526 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2527 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2528 < vect_min_worthwhile_factor (code))
2529 {
2530 if (vect_print_dump_info (REPORT_DETAILS))
2531 fprintf (vect_dump, "not worthwhile without SIMD support.");
2532 return false;
2533 }
2534
2535 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
2536 {
2537 /* FORNOW: not yet supported. */
2538 if (!VECTOR_MODE_P (vec_mode))
2539 return false;
2540
2541 /* Invariant argument is needed for a vector shift
2542 by a scalar shift operand. */
2543 optab_op2_mode = insn_data[icode].operand[2].mode;
2544 if (! (VECTOR_MODE_P (optab_op2_mode)
2545 || dt1 == vect_constant_def
2546 || dt1 == vect_invariant_def))
2547 {
2548 if (vect_print_dump_info (REPORT_DETAILS))
2549 fprintf (vect_dump, "operand mode requires invariant argument.");
2550 return false;
2551 }
2552 }
2553
2554 if (!vec_stmt) /* transformation not required. */
2555 {
2556 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2557 return true;
2558 }
2559
2560 /** Transform. **/
2561
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "transform binary/unary operation.");
2564
2565 /* Handle def. */
2566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2567
2568 /* In case the vectorization factor (VF) is bigger than the number
2569 of elements that we can fit in a vectype (nunits), we have to generate
2570 more than one vector stmt - i.e - we need to "unroll" the
2571 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2572 from one copy of the vector stmt to the next, in the field
2573 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2574 stages to find the correct vector defs to be used when vectorizing
2575 stmts that use the defs of the current stmt. The example below illustrates
2576 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2577 4 vectorized stmts):
2578
2579 before vectorization:
2580 RELATED_STMT VEC_STMT
2581 S1: x = memref - -
2582 S2: z = x + 1 - -
2583
2584 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2585 there):
2586 RELATED_STMT VEC_STMT
2587 VS1_0: vx0 = memref0 VS1_1 -
2588 VS1_1: vx1 = memref1 VS1_2 -
2589 VS1_2: vx2 = memref2 VS1_3 -
2590 VS1_3: vx3 = memref3 - -
2591 S1: x = load - VS1_0
2592 S2: z = x + 1 - -
2593
2594 step2: vectorize stmt S2 (done here):
2595 To vectorize stmt S2 we first need to find the relevant vector
2596 def for the first operand 'x'. This is, as usual, obtained from
2597 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2598 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2599 relevant vector def 'vx0'. Having found 'vx0' we can generate
2600 the vector stmt VS2_0, and as usual, record it in the
2601 STMT_VINFO_VEC_STMT of stmt S2.
2602 When creating the second copy (VS2_1), we obtain the relevant vector
2603 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2604 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2605 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2606 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2607 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2608 chain of stmts and pointers:
2609 RELATED_STMT VEC_STMT
2610 VS1_0: vx0 = memref0 VS1_1 -
2611 VS1_1: vx1 = memref1 VS1_2 -
2612 VS1_2: vx2 = memref2 VS1_3 -
2613 VS1_3: vx3 = memref3 - -
2614 S1: x = load - VS1_0
2615 VS2_0: vz0 = vx0 + v1 VS2_1 -
2616 VS2_1: vz1 = vx1 + v1 VS2_2 -
2617 VS2_2: vz2 = vx2 + v1 VS2_3 -
2618 VS2_3: vz3 = vx3 + v1 - -
2619 S2: z = x + 1 - VS2_0 */
2620
2621 prev_stmt_info = NULL;
2622 for (j = 0; j < ncopies; j++)
2623 {
2624 /* Handle uses. */
2625 if (j == 0)
2626 {
2627 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2628 if (op_type == binary_op)
2629 {
2630 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
2631 {
2632 /* Vector shl and shr insn patterns can be defined with
2633 scalar operand 2 (shift operand). In this case, use
2634 constant or loop invariant op1 directly, without
2635 extending it to vector mode first. */
2636 optab_op2_mode = insn_data[icode].operand[2].mode;
2637 if (!VECTOR_MODE_P (optab_op2_mode))
2638 {
2639 if (vect_print_dump_info (REPORT_DETAILS))
2640 fprintf (vect_dump, "operand 1 using scalar mode.");
2641 vec_oprnd1 = op1;
2642 }
2643 }
2644 if (!vec_oprnd1)
2645 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2646 }
2647 }
2648 else
2649 {
2650 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2651 if (op_type == binary_op)
2652 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2653 }
2654
2655 /* Arguments are ready. create the new vector stmt. */
2656
2657 if (op_type == binary_op)
2658 new_stmt = build_gimple_modify_stmt (vec_dest,
2659 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2660 else
2661 new_stmt = build_gimple_modify_stmt (vec_dest,
2662 build1 (code, vectype, vec_oprnd0));
2663 new_temp = make_ssa_name (vec_dest, new_stmt);
2664 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2665 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2666
2667 if (j == 0)
2668 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2669 else
2670 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2671 prev_stmt_info = vinfo_for_stmt (new_stmt);
2672 }
2673
2674 return true;
2675 }
2676
2677
2678 /* Function vectorizable_type_demotion
2679
2680 Check if STMT performs a binary or unary operation that involves
2681 type demotion, and if it can be vectorized.
2682 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2683 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2684 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2685
2686 bool
2687 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
2688 tree *vec_stmt)
2689 {
2690 tree vec_dest;
2691 tree scalar_dest;
2692 tree operation;
2693 tree op0;
2694 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2695 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2696 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2697 enum tree_code code, code1 = CODE_FOR_nothing;
2698 tree new_temp;
2699 tree def, def_stmt;
2700 enum vect_def_type dt0;
2701 tree new_stmt;
2702 stmt_vec_info prev_stmt_info;
2703 int nunits_in;
2704 int nunits_out;
2705 tree vectype_out;
2706 int ncopies;
2707 int j;
2708 tree expr;
2709 tree vectype_in;
2710 tree scalar_type;
2711
2712 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2713 return false;
2714
2715 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2716 return false;
2717
2718 /* FORNOW: not yet supported. */
2719 if (STMT_VINFO_LIVE_P (stmt_info))
2720 {
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "value used after loop.");
2723 return false;
2724 }
2725
2726 /* Is STMT a vectorizable type-demotion operation? */
2727 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2728 return false;
2729
2730 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2731 return false;
2732
2733 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2734 code = TREE_CODE (operation);
2735 if (code != NOP_EXPR && code != CONVERT_EXPR)
2736 return false;
2737
2738 op0 = TREE_OPERAND (operation, 0);
2739 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2740 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2741
2742 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2743 scalar_type = TREE_TYPE (scalar_dest);
2744 vectype_out = get_vectype_for_scalar_type (scalar_type);
2745 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2746 if (nunits_in != nunits_out / 2) /* FORNOW */
2747 return false;
2748
2749 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2750 gcc_assert (ncopies >= 1);
2751
2752 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2753 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2754 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2755 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2756 && (code == NOP_EXPR || code == CONVERT_EXPR))))
2757 return false;
2758
2759 /* Check the operands of the operation. */
2760 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2761 {
2762 if (vect_print_dump_info (REPORT_DETAILS))
2763 fprintf (vect_dump, "use not simple.");
2764 return false;
2765 }
2766
2767 /* Supportable by target? */
2768 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
2769 return false;
2770
2771 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2772
2773 if (!vec_stmt) /* transformation not required. */
2774 {
2775 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2776 return true;
2777 }
2778
2779 /** Transform. **/
2780 if (vect_print_dump_info (REPORT_DETAILS))
2781 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2782 ncopies);
2783
2784 /* Handle def. */
2785 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2786
2787 /* In case the vectorization factor (VF) is bigger than the number
2788 of elements that we can fit in a vectype (nunits), we have to generate
2789 more than one vector stmt - i.e - we need to "unroll" the
2790 vector stmt by a factor VF/nunits. */
2791 prev_stmt_info = NULL;
2792 for (j = 0; j < ncopies; j++)
2793 {
2794 /* Handle uses. */
2795 if (j == 0)
2796 {
2797 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2798 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2799 }
2800 else
2801 {
2802 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2803 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2804 }
2805
2806 /* Arguments are ready. Create the new vector stmt. */
2807 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2808 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2809 new_temp = make_ssa_name (vec_dest, new_stmt);
2810 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2811 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2812
2813 if (j == 0)
2814 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2815 else
2816 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2817
2818 prev_stmt_info = vinfo_for_stmt (new_stmt);
2819 }
2820
2821 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2822 return true;
2823 }
2824
2825
2826 /* Function vectorizable_type_promotion
2827
2828 Check if STMT performs a binary or unary operation that involves
2829 type promotion, and if it can be vectorized.
2830 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2831 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2832 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2833
2834 bool
2835 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2836 tree *vec_stmt)
2837 {
2838 tree vec_dest;
2839 tree scalar_dest;
2840 tree operation;
2841 tree op0, op1 = NULL;
2842 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2843 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2845 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2846 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2847 int op_type;
2848 tree def, def_stmt;
2849 enum vect_def_type dt0, dt1;
2850 tree new_stmt;
2851 stmt_vec_info prev_stmt_info;
2852 int nunits_in;
2853 int nunits_out;
2854 tree vectype_out;
2855 int ncopies;
2856 int j;
2857 tree vectype_in;
2858
2859 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2860 return false;
2861
2862 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2863 return false;
2864
2865 /* FORNOW: not yet supported. */
2866 if (STMT_VINFO_LIVE_P (stmt_info))
2867 {
2868 if (vect_print_dump_info (REPORT_DETAILS))
2869 fprintf (vect_dump, "value used after loop.");
2870 return false;
2871 }
2872
2873 /* Is STMT a vectorizable type-promotion operation? */
2874 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2875 return false;
2876
2877 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2878 return false;
2879
2880 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2881 code = TREE_CODE (operation);
2882 if (code != NOP_EXPR && code != CONVERT_EXPR
2883 && code != WIDEN_MULT_EXPR)
2884 return false;
2885
2886 op0 = TREE_OPERAND (operation, 0);
2887 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2888 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2889 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2890 gcc_assert (ncopies >= 1);
2891
2892 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2893 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2894 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2895 if (nunits_out != nunits_in / 2) /* FORNOW */
2896 return false;
2897
2898 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2899 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2900 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2901 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2902 && (code == CONVERT_EXPR || code == NOP_EXPR))))
2903 return false;
2904
2905 /* Check the operands of the operation. */
2906 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2907 {
2908 if (vect_print_dump_info (REPORT_DETAILS))
2909 fprintf (vect_dump, "use not simple.");
2910 return false;
2911 }
2912
2913 op_type = TREE_CODE_LENGTH (code);
2914 if (op_type == binary_op)
2915 {
2916 op1 = TREE_OPERAND (operation, 1);
2917 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2918 {
2919 if (vect_print_dump_info (REPORT_DETAILS))
2920 fprintf (vect_dump, "use not simple.");
2921 return false;
2922 }
2923 }
2924
2925 /* Supportable by target? */
2926 if (!supportable_widening_operation (code, stmt, vectype_in,
2927 &decl1, &decl2, &code1, &code2))
2928 return false;
2929
2930 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2931
2932 if (!vec_stmt) /* transformation not required. */
2933 {
2934 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2935 return true;
2936 }
2937
2938 /** Transform. **/
2939
2940 if (vect_print_dump_info (REPORT_DETAILS))
2941 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2942 ncopies);
2943
2944 /* Handle def. */
2945 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2946
2947 /* In case the vectorization factor (VF) is bigger than the number
2948 of elements that we can fit in a vectype (nunits), we have to generate
2949 more than one vector stmt - i.e - we need to "unroll" the
2950 vector stmt by a factor VF/nunits. */
2951
2952 prev_stmt_info = NULL;
2953 for (j = 0; j < ncopies; j++)
2954 {
2955 /* Handle uses. */
2956 if (j == 0)
2957 {
2958 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2959 if (op_type == binary_op)
2960 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2961 }
2962 else
2963 {
2964 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2965 if (op_type == binary_op)
2966 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2967 }
2968
2969 /* Arguments are ready. Create the new vector stmt. We are creating
2970 two vector defs because the widened result does not fit in one vector.
2971 The vectorized stmt can be expressed as a call to a taregt builtin,
2972 or a using a tree-code. */
2973 /* Generate first half of the widened result: */
2974 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2975 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2976 if (j == 0)
2977 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2978 else
2979 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2980 prev_stmt_info = vinfo_for_stmt (new_stmt);
2981
2982 /* Generate second half of the widened result: */
2983 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2984 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2985 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2986 prev_stmt_info = vinfo_for_stmt (new_stmt);
2987
2988 }
2989
2990 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2991 return true;
2992 }
2993
2994
2995 /* Function vect_strided_store_supported.
2996
2997 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2998 and FALSE otherwise. */
2999
3000 static bool
3001 vect_strided_store_supported (tree vectype)
3002 {
3003 optab interleave_high_optab, interleave_low_optab;
3004 int mode;
3005
3006 mode = (int) TYPE_MODE (vectype);
3007
3008 /* Check that the operation is supported. */
3009 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3010 vectype);
3011 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3012 vectype);
3013 if (!interleave_high_optab || !interleave_low_optab)
3014 {
3015 if (vect_print_dump_info (REPORT_DETAILS))
3016 fprintf (vect_dump, "no optab for interleave.");
3017 return false;
3018 }
3019
3020 if (interleave_high_optab->handlers[(int) mode].insn_code
3021 == CODE_FOR_nothing
3022 || interleave_low_optab->handlers[(int) mode].insn_code
3023 == CODE_FOR_nothing)
3024 {
3025 if (vect_print_dump_info (REPORT_DETAILS))
3026 fprintf (vect_dump, "interleave op not supported by target.");
3027 return false;
3028 }
3029 return true;
3030 }
3031
3032
3033 /* Function vect_permute_store_chain.
3034
3035 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3036 a power of 2, generate interleave_high/low stmts to reorder the data
3037 correctly for the stores. Return the final references for stores in
3038 RESULT_CHAIN.
3039
3040 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3041 The input is 4 vectors each containing 8 elements. We assign a number to each
3042 element, the input sequence is:
3043
3044 1st vec: 0 1 2 3 4 5 6 7
3045 2nd vec: 8 9 10 11 12 13 14 15
3046 3rd vec: 16 17 18 19 20 21 22 23
3047 4th vec: 24 25 26 27 28 29 30 31
3048
3049 The output sequence should be:
3050
3051 1st vec: 0 8 16 24 1 9 17 25
3052 2nd vec: 2 10 18 26 3 11 19 27
3053 3rd vec: 4 12 20 28 5 13 21 30
3054 4th vec: 6 14 22 30 7 15 23 31
3055
3056 i.e., we interleave the contents of the four vectors in their order.
3057
3058 We use interleave_high/low instructions to create such output. The input of
3059 each interleave_high/low operation is two vectors:
3060 1st vec 2nd vec
3061 0 1 2 3 4 5 6 7
3062 the even elements of the result vector are obtained left-to-right from the
3063 high/low elements of the first vector. The odd elements of the result are
3064 obtained left-to-right from the high/low elements of the second vector.
3065 The output of interleave_high will be: 0 4 1 5
3066 and of interleave_low: 2 6 3 7
3067
3068
3069 The permutation is done in log LENGTH stages. In each stage interleave_high
3070 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3071 where the first argument is taken from the first half of DR_CHAIN and the
3072 second argument from it's second half.
3073 In our example,
3074
3075 I1: interleave_high (1st vec, 3rd vec)
3076 I2: interleave_low (1st vec, 3rd vec)
3077 I3: interleave_high (2nd vec, 4th vec)
3078 I4: interleave_low (2nd vec, 4th vec)
3079
3080 The output for the first stage is:
3081
3082 I1: 0 16 1 17 2 18 3 19
3083 I2: 4 20 5 21 6 22 7 23
3084 I3: 8 24 9 25 10 26 11 27
3085 I4: 12 28 13 29 14 30 15 31
3086
3087 The output of the second stage, i.e. the final result is:
3088
3089 I1: 0 8 16 24 1 9 17 25
3090 I2: 2 10 18 26 3 11 19 27
3091 I3: 4 12 20 28 5 13 21 30
3092 I4: 6 14 22 30 7 15 23 31. */
3093
3094 static bool
3095 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3096 unsigned int length,
3097 tree stmt,
3098 block_stmt_iterator *bsi,
3099 VEC(tree,heap) **result_chain)
3100 {
3101 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3102 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3103 tree scalar_dest, tmp;
3104 int i;
3105 unsigned int j;
3106 VEC(tree,heap) *first, *second;
3107
3108 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3109 first = VEC_alloc (tree, heap, length/2);
3110 second = VEC_alloc (tree, heap, length/2);
3111
3112 /* Check that the operation is supported. */
3113 if (!vect_strided_store_supported (vectype))
3114 return false;
3115
3116 *result_chain = VEC_copy (tree, heap, dr_chain);
3117
3118 for (i = 0; i < exact_log2 (length); i++)
3119 {
3120 for (j = 0; j < length/2; j++)
3121 {
3122 vect1 = VEC_index (tree, dr_chain, j);
3123 vect2 = VEC_index (tree, dr_chain, j+length/2);
3124
3125 /* Create interleaving stmt:
3126 in the case of big endian:
3127 high = interleave_high (vect1, vect2)
3128 and in the case of little endian:
3129 high = interleave_low (vect1, vect2). */
3130 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3131 DECL_GIMPLE_REG_P (perm_dest) = 1;
3132 add_referenced_var (perm_dest);
3133 if (BYTES_BIG_ENDIAN)
3134 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3135 else
3136 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3137 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3138 high = make_ssa_name (perm_dest, perm_stmt);
3139 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3140 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3141 VEC_replace (tree, *result_chain, 2*j, high);
3142
3143 /* Create interleaving stmt:
3144 in the case of big endian:
3145 low = interleave_low (vect1, vect2)
3146 and in the case of little endian:
3147 low = interleave_high (vect1, vect2). */
3148 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3149 DECL_GIMPLE_REG_P (perm_dest) = 1;
3150 add_referenced_var (perm_dest);
3151 if (BYTES_BIG_ENDIAN)
3152 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3153 else
3154 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3155 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3156 low = make_ssa_name (perm_dest, perm_stmt);
3157 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3158 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3159 VEC_replace (tree, *result_chain, 2*j+1, low);
3160 }
3161 dr_chain = VEC_copy (tree, heap, *result_chain);
3162 }
3163 return true;
3164 }
3165
3166
3167 /* Function vectorizable_store.
3168
3169 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3170 can be vectorized.
3171 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3172 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3173 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3174
3175 bool
3176 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3177 {
3178 tree scalar_dest;
3179 tree data_ref;
3180 tree op;
3181 tree vec_oprnd = NULL_TREE;
3182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3183 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3184 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3185 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3186 enum machine_mode vec_mode;
3187 tree dummy;
3188 enum dr_alignment_support alignment_support_cheme;
3189 ssa_op_iter iter;
3190 def_operand_p def_p;
3191 tree def, def_stmt;
3192 enum vect_def_type dt;
3193 stmt_vec_info prev_stmt_info = NULL;
3194 tree dataref_ptr = NULL_TREE;
3195 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3196 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3197 int j;
3198 tree next_stmt, first_stmt;
3199 bool strided_store = false;
3200 unsigned int group_size, i;
3201 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3202 gcc_assert (ncopies >= 1);
3203
3204 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3205 return false;
3206
3207 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3208 return false;
3209
3210 if (STMT_VINFO_LIVE_P (stmt_info))
3211 {
3212 if (vect_print_dump_info (REPORT_DETAILS))
3213 fprintf (vect_dump, "value used after loop.");
3214 return false;
3215 }
3216
3217 /* Is vectorizable store? */
3218
3219 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3220 return false;
3221
3222 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3223 if (TREE_CODE (scalar_dest) != ARRAY_REF
3224 && TREE_CODE (scalar_dest) != INDIRECT_REF
3225 && !DR_GROUP_FIRST_DR (stmt_info))
3226 return false;
3227
3228 op = GIMPLE_STMT_OPERAND (stmt, 1);
3229 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3230 {
3231 if (vect_print_dump_info (REPORT_DETAILS))
3232 fprintf (vect_dump, "use not simple.");
3233 return false;
3234 }
3235
3236 vec_mode = TYPE_MODE (vectype);
3237 /* FORNOW. In some cases can vectorize even if data-type not supported
3238 (e.g. - array initialization with 0). */
3239 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3240 return false;
3241
3242 if (!STMT_VINFO_DATA_REF (stmt_info))
3243 return false;
3244
3245 if (DR_GROUP_FIRST_DR (stmt_info))
3246 {
3247 strided_store = true;
3248 if (!vect_strided_store_supported (vectype))
3249 return false;
3250 }
3251
3252 if (!vec_stmt) /* transformation not required. */
3253 {
3254 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3255 return true;
3256 }
3257
3258 /** Transform. **/
3259
3260 if (vect_print_dump_info (REPORT_DETAILS))
3261 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3262
3263 if (strided_store)
3264 {
3265 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3266 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3267 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3268
3269 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3270
3271 /* We vectorize all the stmts of the interleaving group when we
3272 reach the last stmt in the group. */
3273 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3274 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3275 {
3276 *vec_stmt = NULL_TREE;
3277 return true;
3278 }
3279 }
3280 else
3281 {
3282 first_stmt = stmt;
3283 first_dr = dr;
3284 group_size = 1;
3285 }
3286
3287 dr_chain = VEC_alloc (tree, heap, group_size);
3288 oprnds = VEC_alloc (tree, heap, group_size);
3289
3290 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3291 gcc_assert (alignment_support_cheme);
3292 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3293
3294 /* In case the vectorization factor (VF) is bigger than the number
3295 of elements that we can fit in a vectype (nunits), we have to generate
3296 more than one vector stmt - i.e - we need to "unroll" the
3297 vector stmt by a factor VF/nunits. For more details see documentation in
3298 vect_get_vec_def_for_copy_stmt. */
3299
3300 /* In case of interleaving (non-unit strided access):
3301
3302 S1: &base + 2 = x2
3303 S2: &base = x0
3304 S3: &base + 1 = x1
3305 S4: &base + 3 = x3
3306
3307 We create vectorized stores starting from base address (the access of the
3308 first stmt in the chain (S2 in the above example), when the last store stmt
3309 of the chain (S4) is reached:
3310
3311 VS1: &base = vx2
3312 VS2: &base + vec_size*1 = vx0
3313 VS3: &base + vec_size*2 = vx1
3314 VS4: &base + vec_size*3 = vx3
3315
3316 Then permutation statements are generated:
3317
3318 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3319 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3320 ...
3321
3322 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3323 (the order of the data-refs in the output of vect_permute_store_chain
3324 corresponds to the order of scalar stmts in the interleaving chain - see
3325 the documentation of vect_permute_store_chain()).
3326
3327 In case of both multiple types and interleaving, above vector stores and
3328 permutation stmts are created for every copy. The result vector stmts are
3329 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3330 STMT_VINFO_RELATED_STMT for the next copies.
3331 */
3332
3333 prev_stmt_info = NULL;
3334 for (j = 0; j < ncopies; j++)
3335 {
3336 tree new_stmt;
3337 tree ptr_incr;
3338
3339 if (j == 0)
3340 {
3341 /* For interleaved stores we collect vectorized defs for all the
3342 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3343 as an input to vect_permute_store_chain(), and OPRNDS as an input
3344 to vect_get_vec_def_for_stmt_copy() for the next copy.
3345 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3346 OPRNDS are of size 1. */
3347 next_stmt = first_stmt;
3348 for (i = 0; i < group_size; i++)
3349 {
3350 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3351 is the exact number of stmts in the chain. Therefore, NEXT_STMT
3352 can't be NULL_TREE. In case that there is no interleaving,
3353 GROUP_SIZE is 1, and only one iteration of the loop will be
3354 executed. */
3355 gcc_assert (next_stmt);
3356 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
3357 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
3358 VEC_quick_push(tree, dr_chain, vec_oprnd);
3359 VEC_quick_push(tree, oprnds, vec_oprnd);
3360 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3361 }
3362 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
3363 &dummy, &ptr_incr, false,
3364 TREE_TYPE (vec_oprnd));
3365 }
3366 else
3367 {
3368 /* For interleaved stores we created vectorized defs for all the
3369 defs stored in OPRNDS in the previous iteration (previous copy).
3370 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3371 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3372 next copy.
3373 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3374 OPRNDS are of size 1. */
3375 for (i = 0; i < group_size; i++)
3376 {
3377 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
3378 VEC_index (tree, oprnds, i));
3379 VEC_replace(tree, dr_chain, i, vec_oprnd);
3380 VEC_replace(tree, oprnds, i, vec_oprnd);
3381 }
3382 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3383 }
3384
3385 if (strided_store)
3386 {
3387 result_chain = VEC_alloc (tree, heap, group_size);
3388 /* Permute. */
3389 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
3390 &result_chain))
3391 return false;
3392 }
3393
3394 next_stmt = first_stmt;
3395 for (i = 0; i < group_size; i++)
3396 {
3397 /* For strided stores vectorized defs are interleaved in
3398 vect_permute_store_chain(). */
3399 if (strided_store)
3400 vec_oprnd = VEC_index(tree, result_chain, i);
3401
3402 data_ref = build_fold_indirect_ref (dataref_ptr);
3403 /* Arguments are ready. Create the new vector stmt. */
3404 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
3405 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3406
3407 /* Set the VDEFs for the vector pointer. If this virtual def
3408 has a use outside the loop and a loop peel is performed
3409 then the def may be renamed by the peel. Mark it for
3410 renaming so the later use will also be renamed. */
3411 copy_virtual_operands (new_stmt, next_stmt);
3412 if (j == 0)
3413 {
3414 /* The original store is deleted so the same SSA_NAMEs
3415 can be used. */
3416 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
3417 {
3418 SSA_NAME_DEF_STMT (def) = new_stmt;
3419 mark_sym_for_renaming (SSA_NAME_VAR (def));
3420 }
3421
3422 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3423 }
3424 else
3425 {
3426 /* Create new names for all the definitions created by COPY and
3427 add replacement mappings for each new name. */
3428 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
3429 {
3430 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
3431 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
3432 }
3433
3434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3435 }
3436
3437 prev_stmt_info = vinfo_for_stmt (new_stmt);
3438 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3439 if (!next_stmt)
3440 break;
3441 /* Bump the vector pointer. */
3442 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3443 }
3444 }
3445
3446 return true;
3447 }
3448
3449
3450 /* Function vect_setup_realignment
3451
3452 This function is called when vectorizing an unaligned load using
3453 the dr_unaligned_software_pipeline scheme.
3454 This function generates the following code at the loop prolog:
3455
3456 p = initial_addr;
3457 msq_init = *(floor(p)); # prolog load
3458 realignment_token = call target_builtin;
3459 loop:
3460 msq = phi (msq_init, ---)
3461
3462 The code above sets up a new (vector) pointer, pointing to the first
3463 location accessed by STMT, and a "floor-aligned" load using that pointer.
3464 It also generates code to compute the "realignment-token" (if the relevant
3465 target hook was defined), and creates a phi-node at the loop-header bb
3466 whose arguments are the result of the prolog-load (created by this
3467 function) and the result of a load that takes place in the loop (to be
3468 created by the caller to this function).
3469 The caller to this function uses the phi-result (msq) to create the
3470 realignment code inside the loop, and sets up the missing phi argument,
3471 as follows:
3472
3473 loop:
3474 msq = phi (msq_init, lsq)
3475 lsq = *(floor(p')); # load in loop
3476 result = realign_load (msq, lsq, realignment_token);
3477
3478 Input:
3479 STMT - (scalar) load stmt to be vectorized. This load accesses
3480 a memory location that may be unaligned.
3481 BSI - place where new code is to be inserted.
3482
3483 Output:
3484 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3485 target hook, if defined.
3486 Return value - the result of the loop-header phi node. */
3487
3488 static tree
3489 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
3490 tree *realignment_token)
3491 {
3492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3493 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3494 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3495 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3496 edge pe = loop_preheader_edge (loop);
3497 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3498 tree vec_dest;
3499 tree init_addr;
3500 tree inc;
3501 tree ptr;
3502 tree data_ref;
3503 tree new_stmt;
3504 basic_block new_bb;
3505 tree msq_init;
3506 tree new_temp;
3507 tree phi_stmt;
3508 tree msq;
3509
3510 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
3511 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3512 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
3513 NULL_TREE);
3514 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3515 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
3516 new_temp = make_ssa_name (vec_dest, new_stmt);
3517 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3518 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
3519 gcc_assert (!new_bb);
3520 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
3521 copy_virtual_operands (new_stmt, stmt);
3522 update_vuses_to_preheader (new_stmt, loop);
3523
3524 /* 2. Create permutation mask, if required, in loop preheader. */
3525 if (targetm.vectorize.builtin_mask_for_load)
3526 {
3527 tree builtin_decl;
3528
3529 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3530 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
3531 vec_dest = vect_create_destination_var (scalar_dest,
3532 TREE_TYPE (new_stmt));
3533 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3534 new_temp = make_ssa_name (vec_dest, new_stmt);
3535 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3536 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
3537 gcc_assert (!new_bb);
3538 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
3539
3540 /* The result of the CALL_EXPR to this builtin is determined from
3541 the value of the parameter and no global variables are touched
3542 which makes the builtin a "const" function. Requiring the
3543 builtin to have the "const" attribute makes it unnecessary
3544 to call mark_call_clobbered. */
3545 gcc_assert (TREE_READONLY (builtin_decl));
3546 }
3547
3548 /* 3. Create msq = phi <msq_init, lsq> in loop */
3549 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3550 msq = make_ssa_name (vec_dest, NULL_TREE);
3551 phi_stmt = create_phi_node (msq, loop->header);
3552 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3553 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
3554
3555 return msq;
3556 }
3557
3558
3559 /* Function vect_strided_load_supported.
3560
3561 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3562 and FALSE otherwise. */
3563
3564 static bool
3565 vect_strided_load_supported (tree vectype)
3566 {
3567 optab perm_even_optab, perm_odd_optab;
3568 int mode;
3569
3570 mode = (int) TYPE_MODE (vectype);
3571
3572 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
3573 if (!perm_even_optab)
3574 {
3575 if (vect_print_dump_info (REPORT_DETAILS))
3576 fprintf (vect_dump, "no optab for perm_even.");
3577 return false;
3578 }
3579
3580 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3581 {
3582 if (vect_print_dump_info (REPORT_DETAILS))
3583 fprintf (vect_dump, "perm_even op not supported by target.");
3584 return false;
3585 }
3586
3587 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
3588 if (!perm_odd_optab)
3589 {
3590 if (vect_print_dump_info (REPORT_DETAILS))
3591 fprintf (vect_dump, "no optab for perm_odd.");
3592 return false;
3593 }
3594
3595 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3596 {
3597 if (vect_print_dump_info (REPORT_DETAILS))
3598 fprintf (vect_dump, "perm_odd op not supported by target.");
3599 return false;
3600 }
3601 return true;
3602 }
3603
3604
3605 /* Function vect_permute_load_chain.
3606
3607 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3608 a power of 2, generate extract_even/odd stmts to reorder the input data
3609 correctly. Return the final references for loads in RESULT_CHAIN.
3610
3611 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3612 The input is 4 vectors each containing 8 elements. We assign a number to each
3613 element, the input sequence is:
3614
3615 1st vec: 0 1 2 3 4 5 6 7
3616 2nd vec: 8 9 10 11 12 13 14 15
3617 3rd vec: 16 17 18 19 20 21 22 23
3618 4th vec: 24 25 26 27 28 29 30 31
3619
3620 The output sequence should be:
3621
3622 1st vec: 0 4 8 12 16 20 24 28
3623 2nd vec: 1 5 9 13 17 21 25 29
3624 3rd vec: 2 6 10 14 18 22 26 30
3625 4th vec: 3 7 11 15 19 23 27 31
3626
3627 i.e., the first output vector should contain the first elements of each
3628 interleaving group, etc.
3629
3630 We use extract_even/odd instructions to create such output. The input of each
3631 extract_even/odd operation is two vectors
3632 1st vec 2nd vec
3633 0 1 2 3 4 5 6 7
3634
3635 and the output is the vector of extracted even/odd elements. The output of
3636 extract_even will be: 0 2 4 6
3637 and of extract_odd: 1 3 5 7
3638
3639
3640 The permutation is done in log LENGTH stages. In each stage extract_even and
3641 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3642 order. In our example,
3643
3644 E1: extract_even (1st vec, 2nd vec)
3645 E2: extract_odd (1st vec, 2nd vec)
3646 E3: extract_even (3rd vec, 4th vec)
3647 E4: extract_odd (3rd vec, 4th vec)
3648
3649 The output for the first stage will be:
3650
3651 E1: 0 2 4 6 8 10 12 14
3652 E2: 1 3 5 7 9 11 13 15
3653 E3: 16 18 20 22 24 26 28 30
3654 E4: 17 19 21 23 25 27 29 31
3655
3656 In order to proceed and create the correct sequence for the next stage (or
3657 for the correct output, if the second stage is the last one, as in our
3658 example), we first put the output of extract_even operation and then the
3659 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3660 The input for the second stage is:
3661
3662 1st vec (E1): 0 2 4 6 8 10 12 14
3663 2nd vec (E3): 16 18 20 22 24 26 28 30
3664 3rd vec (E2): 1 3 5 7 9 11 13 15
3665 4th vec (E4): 17 19 21 23 25 27 29 31
3666
3667 The output of the second stage:
3668
3669 E1: 0 4 8 12 16 20 24 28
3670 E2: 2 6 10 14 18 22 26 30
3671 E3: 1 5 9 13 17 21 25 29
3672 E4: 3 7 11 15 19 23 27 31
3673
3674 And RESULT_CHAIN after reordering:
3675
3676 1st vec (E1): 0 4 8 12 16 20 24 28
3677 2nd vec (E3): 1 5 9 13 17 21 25 29
3678 3rd vec (E2): 2 6 10 14 18 22 26 30
3679 4th vec (E4): 3 7 11 15 19 23 27 31. */
3680
3681 static bool
3682 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3683 unsigned int length,
3684 tree stmt,
3685 block_stmt_iterator *bsi,
3686 VEC(tree,heap) **result_chain)
3687 {
3688 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
3689 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3690 tree tmp;
3691 int i;
3692 unsigned int j;
3693
3694 /* Check that the operation is supported. */
3695 if (!vect_strided_load_supported (vectype))
3696 return false;
3697
3698 *result_chain = VEC_copy (tree, heap, dr_chain);
3699 for (i = 0; i < exact_log2 (length); i++)
3700 {
3701 for (j = 0; j < length; j +=2)
3702 {
3703 first_vect = VEC_index (tree, dr_chain, j);
3704 second_vect = VEC_index (tree, dr_chain, j+1);
3705
3706 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3707 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3708 DECL_GIMPLE_REG_P (perm_dest) = 1;
3709 add_referenced_var (perm_dest);
3710
3711 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
3712 first_vect, second_vect);
3713 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3714
3715 data_ref = make_ssa_name (perm_dest, perm_stmt);
3716 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3717 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3718 mark_symbols_for_renaming (perm_stmt);
3719
3720 VEC_replace (tree, *result_chain, j/2, data_ref);
3721
3722 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3723 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3724 DECL_GIMPLE_REG_P (perm_dest) = 1;
3725 add_referenced_var (perm_dest);
3726
3727 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
3728 first_vect, second_vect);
3729 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3730 data_ref = make_ssa_name (perm_dest, perm_stmt);
3731 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3732 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3733 mark_symbols_for_renaming (perm_stmt);
3734
3735 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3736 }
3737 dr_chain = VEC_copy (tree, heap, *result_chain);
3738 }
3739 return true;
3740 }
3741
3742
3743 /* Function vect_transform_strided_load.
3744
3745 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3746 to perform their permutation and ascribe the result vectorized statements to
3747 the scalar statements.
3748 */
3749
3750 static bool
3751 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
3752 block_stmt_iterator *bsi)
3753 {
3754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3755 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3756 tree next_stmt, new_stmt;
3757 VEC(tree,heap) *result_chain = NULL;
3758 unsigned int i, gap_count;
3759 tree tmp_data_ref;
3760
3761 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3762 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3763 vectors, that are ready for vector computation. */
3764 result_chain = VEC_alloc (tree, heap, size);
3765 /* Permute. */
3766 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
3767 return false;
3768
3769 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3770 Since we scan the chain starting from it's first node, their order
3771 corresponds the order of data-refs in RESULT_CHAIN. */
3772 next_stmt = first_stmt;
3773 gap_count = 1;
3774 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
3775 {
3776 if (!next_stmt)
3777 break;
3778
3779 /* Skip the gaps. Loads created for the gaps will be removed by dead
3780 code elimination pass later.
3781 DR_GROUP_GAP is the number of steps in elements from the previous
3782 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3783 correspond to the gaps.
3784 */
3785 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3786 {
3787 gap_count++;
3788 continue;
3789 }
3790
3791 while (next_stmt)
3792 {
3793 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3794 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3795 copies, and we put the new vector statement in the first available
3796 RELATED_STMT. */
3797 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3798 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3799 else
3800 {
3801 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3802 tree rel_stmt = STMT_VINFO_RELATED_STMT (
3803 vinfo_for_stmt (prev_stmt));
3804 while (rel_stmt)
3805 {
3806 prev_stmt = rel_stmt;
3807 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3808 }
3809 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
3810 }
3811 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3812 gap_count = 1;
3813 /* If NEXT_STMT accesses the same DR as the previous statement,
3814 put the same TMP_DATA_REF as its vectorized statement; otherwise
3815 get the next data-ref from RESULT_CHAIN. */
3816 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3817 break;
3818 }
3819 }
3820 return true;
3821 }
3822
3823
3824 /* vectorizable_load.
3825
3826 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3827 can be vectorized.
3828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3831
3832 bool
3833 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3834 {
3835 tree scalar_dest;
3836 tree vec_dest = NULL;
3837 tree data_ref = NULL;
3838 tree op;
3839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3840 stmt_vec_info prev_stmt_info;
3841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3842 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3843 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3845 tree new_temp;
3846 int mode;
3847 tree new_stmt = NULL_TREE;
3848 tree dummy;
3849 enum dr_alignment_support alignment_support_cheme;
3850 tree dataref_ptr = NULL_TREE;
3851 tree ptr_incr;
3852 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3853 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3854 int i, j, group_size;
3855 tree msq = NULL_TREE, lsq;
3856 tree offset = NULL_TREE;
3857 tree realignment_token = NULL_TREE;
3858 tree phi_stmt = NULL_TREE;
3859 VEC(tree,heap) *dr_chain = NULL;
3860 bool strided_load = false;
3861 tree first_stmt;
3862
3863 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3864 return false;
3865
3866 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3867 return false;
3868
3869 /* FORNOW: not yet supported. */
3870 if (STMT_VINFO_LIVE_P (stmt_info))
3871 {
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "value used after loop.");
3874 return false;
3875 }
3876
3877 /* Is vectorizable load? */
3878 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3879 return false;
3880
3881 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3882 if (TREE_CODE (scalar_dest) != SSA_NAME)
3883 return false;
3884
3885 op = GIMPLE_STMT_OPERAND (stmt, 1);
3886 if (TREE_CODE (op) != ARRAY_REF
3887 && TREE_CODE (op) != INDIRECT_REF
3888 && !DR_GROUP_FIRST_DR (stmt_info))
3889 return false;
3890
3891 if (!STMT_VINFO_DATA_REF (stmt_info))
3892 return false;
3893
3894 mode = (int) TYPE_MODE (vectype);
3895
3896 /* FORNOW. In some cases can vectorize even if data-type not supported
3897 (e.g. - data copies). */
3898 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3899 {
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "Aligned load, but unsupported type.");
3902 return false;
3903 }
3904
3905 /* Check if the load is a part of an interleaving chain. */
3906 if (DR_GROUP_FIRST_DR (stmt_info))
3907 {
3908 strided_load = true;
3909
3910 /* Check if interleaving is supported. */
3911 if (!vect_strided_load_supported (vectype))
3912 return false;
3913 }
3914
3915 if (!vec_stmt) /* transformation not required. */
3916 {
3917 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3918 return true;
3919 }
3920
3921 /** Transform. **/
3922
3923 if (vect_print_dump_info (REPORT_DETAILS))
3924 fprintf (vect_dump, "transform load.");
3925
3926 if (strided_load)
3927 {
3928 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3929 /* Check if the chain of loads is already vectorized. */
3930 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3931 {
3932 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3933 return true;
3934 }
3935 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3936 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3937 dr_chain = VEC_alloc (tree, heap, group_size);
3938 }
3939 else
3940 {
3941 first_stmt = stmt;
3942 first_dr = dr;
3943 group_size = 1;
3944 }
3945
3946 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3947 gcc_assert (alignment_support_cheme);
3948
3949
3950 /* In case the vectorization factor (VF) is bigger than the number
3951 of elements that we can fit in a vectype (nunits), we have to generate
3952 more than one vector stmt - i.e - we need to "unroll" the
3953 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3954 from one copy of the vector stmt to the next, in the field
3955 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3956 stages to find the correct vector defs to be used when vectorizing
3957 stmts that use the defs of the current stmt. The example below illustrates
3958 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3959 4 vectorized stmts):
3960
3961 before vectorization:
3962 RELATED_STMT VEC_STMT
3963 S1: x = memref - -
3964 S2: z = x + 1 - -
3965
3966 step 1: vectorize stmt S1:
3967 We first create the vector stmt VS1_0, and, as usual, record a
3968 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3969 Next, we create the vector stmt VS1_1, and record a pointer to
3970 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3971 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3972 stmts and pointers:
3973 RELATED_STMT VEC_STMT
3974 VS1_0: vx0 = memref0 VS1_1 -
3975 VS1_1: vx1 = memref1 VS1_2 -
3976 VS1_2: vx2 = memref2 VS1_3 -
3977 VS1_3: vx3 = memref3 - -
3978 S1: x = load - VS1_0
3979 S2: z = x + 1 - -
3980
3981 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3982 information we recorded in RELATED_STMT field is used to vectorize
3983 stmt S2. */
3984
3985 /* In case of interleaving (non-unit strided access):
3986
3987 S1: x2 = &base + 2
3988 S2: x0 = &base
3989 S3: x1 = &base + 1
3990 S4: x3 = &base + 3
3991
3992 Vectorized loads are created in the order of memory accesses
3993 starting from the access of the first stmt of the chain:
3994
3995 VS1: vx0 = &base
3996 VS2: vx1 = &base + vec_size*1
3997 VS3: vx3 = &base + vec_size*2
3998 VS4: vx4 = &base + vec_size*3
3999
4000 Then permutation statements are generated:
4001
4002 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4003 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4004 ...
4005
4006 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4007 (the order of the data-refs in the output of vect_permute_load_chain
4008 corresponds to the order of scalar stmts in the interleaving chain - see
4009 the documentation of vect_permute_load_chain()).
4010 The generation of permutation stmts and recording them in
4011 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4012
4013 In case of both multiple types and interleaving, the vector loads and
4014 permutation stmts above are created for every copy. The result vector stmts
4015 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4016 STMT_VINFO_RELATED_STMT for the next copies. */
4017
4018 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4019 on a target that supports unaligned accesses (dr_unaligned_supported)
4020 we generate the following code:
4021 p = initial_addr;
4022 indx = 0;
4023 loop {
4024 p = p + indx * vectype_size;
4025 vec_dest = *(p);
4026 indx = indx + 1;
4027 }
4028
4029 Otherwise, the data reference is potentially unaligned on a target that
4030 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4031 then generate the following code, in which the data in each iteration is
4032 obtained by two vector loads, one from the previous iteration, and one
4033 from the current iteration:
4034 p1 = initial_addr;
4035 msq_init = *(floor(p1))
4036 p2 = initial_addr + VS - 1;
4037 realignment_token = call target_builtin;
4038 indx = 0;
4039 loop {
4040 p2 = p2 + indx * vectype_size
4041 lsq = *(floor(p2))
4042 vec_dest = realign_load (msq, lsq, realignment_token)
4043 indx = indx + 1;
4044 msq = lsq;
4045 } */
4046
4047 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4048 {
4049 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4050 phi_stmt = SSA_NAME_DEF_STMT (msq);
4051 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4052 }
4053
4054 prev_stmt_info = NULL;
4055 for (j = 0; j < ncopies; j++)
4056 {
4057 /* 1. Create the vector pointer update chain. */
4058 if (j == 0)
4059 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4060 &ptr_incr, false, NULL_TREE);
4061 else
4062 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4063
4064 for (i = 0; i < group_size; i++)
4065 {
4066 /* 2. Create the vector-load in the loop. */
4067 switch (alignment_support_cheme)
4068 {
4069 case dr_aligned:
4070 gcc_assert (aligned_access_p (first_dr));
4071 data_ref = build_fold_indirect_ref (dataref_ptr);
4072 break;
4073 case dr_unaligned_supported:
4074 {
4075 int mis = DR_MISALIGNMENT (first_dr);
4076 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4077
4078 gcc_assert (!aligned_access_p (first_dr));
4079 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4080 data_ref =
4081 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4082 break;
4083 }
4084 case dr_unaligned_software_pipeline:
4085 gcc_assert (!aligned_access_p (first_dr));
4086 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4087 break;
4088 default:
4089 gcc_unreachable ();
4090 }
4091 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4092 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4093 new_temp = make_ssa_name (vec_dest, new_stmt);
4094 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4095 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4096 copy_virtual_operands (new_stmt, stmt);
4097 mark_symbols_for_renaming (new_stmt);
4098
4099 /* 3. Handle explicit realignment if necessary/supported. */
4100 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4101 {
4102 /* Create in loop:
4103 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4104 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4105 if (!realignment_token)
4106 realignment_token = dataref_ptr;
4107 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4108 new_stmt =
4109 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4110 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4111 new_temp = make_ssa_name (vec_dest, new_stmt);
4112 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4113 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4114 if (i == group_size - 1 && j == ncopies - 1)
4115 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4116 msq = lsq;
4117 }
4118 if (strided_load)
4119 VEC_quick_push (tree, dr_chain, new_temp);
4120 if (i < group_size - 1)
4121 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4122 }
4123
4124 if (strided_load)
4125 {
4126 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4127 return false;
4128 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4129 dr_chain = VEC_alloc (tree, heap, group_size);
4130 }
4131 else
4132 {
4133 if (j == 0)
4134 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4135 else
4136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4137 prev_stmt_info = vinfo_for_stmt (new_stmt);
4138 }
4139 }
4140
4141 return true;
4142 }
4143
4144
4145 /* Function vectorizable_live_operation.
4146
4147 STMT computes a value that is used outside the loop. Check if
4148 it can be supported. */
4149
4150 bool
4151 vectorizable_live_operation (tree stmt,
4152 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4153 tree *vec_stmt ATTRIBUTE_UNUSED)
4154 {
4155 tree operation;
4156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4157 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4158 int i;
4159 int op_type;
4160 tree op;
4161 tree def, def_stmt;
4162 enum vect_def_type dt;
4163
4164 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4165
4166 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4167 return false;
4168
4169 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4170 return false;
4171
4172 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4173 return false;
4174
4175 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4176 op_type = TREE_OPERAND_LENGTH (operation);
4177
4178 /* FORNOW: support only if all uses are invariant. This means
4179 that the scalar operations can remain in place, unvectorized.
4180 The original last scalar value that they compute will be used. */
4181
4182 for (i = 0; i < op_type; i++)
4183 {
4184 op = TREE_OPERAND (operation, i);
4185 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4186 {
4187 if (vect_print_dump_info (REPORT_DETAILS))
4188 fprintf (vect_dump, "use not simple.");
4189 return false;
4190 }
4191
4192 if (dt != vect_invariant_def && dt != vect_constant_def)
4193 return false;
4194 }
4195
4196 /* No transformation is required for the cases we currently support. */
4197 return true;
4198 }
4199
4200
4201 /* Function vect_is_simple_cond.
4202
4203 Input:
4204 LOOP - the loop that is being vectorized.
4205 COND - Condition that is checked for simple use.
4206
4207 Returns whether a COND can be vectorized. Checks whether
4208 condition operands are supportable using vec_is_simple_use. */
4209
4210 static bool
4211 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4212 {
4213 tree lhs, rhs;
4214 tree def;
4215 enum vect_def_type dt;
4216
4217 if (!COMPARISON_CLASS_P (cond))
4218 return false;
4219
4220 lhs = TREE_OPERAND (cond, 0);
4221 rhs = TREE_OPERAND (cond, 1);
4222
4223 if (TREE_CODE (lhs) == SSA_NAME)
4224 {
4225 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4226 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4227 return false;
4228 }
4229 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4230 return false;
4231
4232 if (TREE_CODE (rhs) == SSA_NAME)
4233 {
4234 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4235 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4236 return false;
4237 }
4238 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4239 return false;
4240
4241 return true;
4242 }
4243
4244 /* vectorizable_condition.
4245
4246 Check if STMT is conditional modify expression that can be vectorized.
4247 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4248 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4249 at BSI.
4250
4251 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4252
4253 bool
4254 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4255 {
4256 tree scalar_dest = NULL_TREE;
4257 tree vec_dest = NULL_TREE;
4258 tree op = NULL_TREE;
4259 tree cond_expr, then_clause, else_clause;
4260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4261 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4262 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4263 tree vec_compare, vec_cond_expr;
4264 tree new_temp;
4265 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4266 enum machine_mode vec_mode;
4267 tree def;
4268 enum vect_def_type dt;
4269 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4270 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4271
4272 gcc_assert (ncopies >= 1);
4273 if (ncopies > 1)
4274 return false; /* FORNOW */
4275
4276 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4277 return false;
4278
4279 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4280 return false;
4281
4282 /* FORNOW: not yet supported. */
4283 if (STMT_VINFO_LIVE_P (stmt_info))
4284 {
4285 if (vect_print_dump_info (REPORT_DETAILS))
4286 fprintf (vect_dump, "value used after loop.");
4287 return false;
4288 }
4289
4290 /* Is vectorizable conditional operation? */
4291 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4292 return false;
4293
4294 op = GIMPLE_STMT_OPERAND (stmt, 1);
4295
4296 if (TREE_CODE (op) != COND_EXPR)
4297 return false;
4298
4299 cond_expr = TREE_OPERAND (op, 0);
4300 then_clause = TREE_OPERAND (op, 1);
4301 else_clause = TREE_OPERAND (op, 2);
4302
4303 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4304 return false;
4305
4306 /* We do not handle two different vector types for the condition
4307 and the values. */
4308 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4309 return false;
4310
4311 if (TREE_CODE (then_clause) == SSA_NAME)
4312 {
4313 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4314 if (!vect_is_simple_use (then_clause, loop_vinfo,
4315 &then_def_stmt, &def, &dt))
4316 return false;
4317 }
4318 else if (TREE_CODE (then_clause) != INTEGER_CST
4319 && TREE_CODE (then_clause) != REAL_CST)
4320 return false;
4321
4322 if (TREE_CODE (else_clause) == SSA_NAME)
4323 {
4324 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4325 if (!vect_is_simple_use (else_clause, loop_vinfo,
4326 &else_def_stmt, &def, &dt))
4327 return false;
4328 }
4329 else if (TREE_CODE (else_clause) != INTEGER_CST
4330 && TREE_CODE (else_clause) != REAL_CST)
4331 return false;
4332
4333
4334 vec_mode = TYPE_MODE (vectype);
4335
4336 if (!vec_stmt)
4337 {
4338 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4339 return expand_vec_cond_expr_p (op, vec_mode);
4340 }
4341
4342 /* Transform */
4343
4344 /* Handle def. */
4345 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4346 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4347
4348 /* Handle cond expr. */
4349 vec_cond_lhs =
4350 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4351 vec_cond_rhs =
4352 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4353 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4354 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4355
4356 /* Arguments are ready. create the new vector stmt. */
4357 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4358 vec_cond_lhs, vec_cond_rhs);
4359 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4360 vec_compare, vec_then_clause, vec_else_clause);
4361
4362 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
4363 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4364 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
4365 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
4366
4367 return true;
4368 }
4369
4370 /* Function vect_transform_stmt.
4371
4372 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4373
4374 bool
4375 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
4376 {
4377 bool is_store = false;
4378 tree vec_stmt = NULL_TREE;
4379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4380 tree orig_stmt_in_pattern;
4381 bool done;
4382
4383 switch (STMT_VINFO_TYPE (stmt_info))
4384 {
4385 case type_demotion_vec_info_type:
4386 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
4387 gcc_assert (done);
4388 break;
4389
4390 case type_promotion_vec_info_type:
4391 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
4392 gcc_assert (done);
4393 break;
4394
4395 case type_conversion_vec_info_type:
4396 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
4397 gcc_assert (done);
4398 break;
4399
4400 case induc_vec_info_type:
4401 done = vectorizable_induction (stmt, bsi, &vec_stmt);
4402 gcc_assert (done);
4403 break;
4404
4405 case op_vec_info_type:
4406 done = vectorizable_operation (stmt, bsi, &vec_stmt);
4407 gcc_assert (done);
4408 break;
4409
4410 case assignment_vec_info_type:
4411 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
4412 gcc_assert (done);
4413 break;
4414
4415 case load_vec_info_type:
4416 done = vectorizable_load (stmt, bsi, &vec_stmt);
4417 gcc_assert (done);
4418 break;
4419
4420 case store_vec_info_type:
4421 done = vectorizable_store (stmt, bsi, &vec_stmt);
4422 gcc_assert (done);
4423 if (DR_GROUP_FIRST_DR (stmt_info))
4424 {
4425 /* In case of interleaving, the whole chain is vectorized when the
4426 last store in the chain is reached. Store stmts before the last
4427 one are skipped, and there vec_stmt_info shouldn't be freed
4428 meanwhile. */
4429 *strided_store = true;
4430 if (STMT_VINFO_VEC_STMT (stmt_info))
4431 is_store = true;
4432 }
4433 else
4434 is_store = true;
4435 break;
4436
4437 case condition_vec_info_type:
4438 done = vectorizable_condition (stmt, bsi, &vec_stmt);
4439 gcc_assert (done);
4440 break;
4441
4442 case call_vec_info_type:
4443 done = vectorizable_call (stmt, bsi, &vec_stmt);
4444 break;
4445
4446 case reduc_vec_info_type:
4447 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
4448 gcc_assert (done);
4449 break;
4450
4451 default:
4452 if (!STMT_VINFO_LIVE_P (stmt_info))
4453 {
4454 if (vect_print_dump_info (REPORT_DETAILS))
4455 fprintf (vect_dump, "stmt not supported.");
4456 gcc_unreachable ();
4457 }
4458 }
4459
4460 if (STMT_VINFO_LIVE_P (stmt_info)
4461 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4462 {
4463 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
4464 gcc_assert (done);
4465 }
4466
4467 if (vec_stmt)
4468 {
4469 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4470 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4471 if (orig_stmt_in_pattern)
4472 {
4473 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4474 /* STMT was inserted by the vectorizer to replace a computation idiom.
4475 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4476 computed this idiom. We need to record a pointer to VEC_STMT in
4477 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4478 documentation of vect_pattern_recog. */
4479 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4480 {
4481 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4482 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4483 }
4484 }
4485 }
4486
4487 return is_store;
4488 }
4489
4490
4491 /* This function builds ni_name = number of iterations loop executes
4492 on the loop preheader. */
4493
4494 static tree
4495 vect_build_loop_niters (loop_vec_info loop_vinfo)
4496 {
4497 tree ni_name, stmt, var;
4498 edge pe;
4499 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4500 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
4501
4502 var = create_tmp_var (TREE_TYPE (ni), "niters");
4503 add_referenced_var (var);
4504 ni_name = force_gimple_operand (ni, &stmt, false, var);
4505
4506 pe = loop_preheader_edge (loop);
4507 if (stmt)
4508 {
4509 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4510 gcc_assert (!new_bb);
4511 }
4512
4513 return ni_name;
4514 }
4515
4516
4517 /* This function generates the following statements:
4518
4519 ni_name = number of iterations loop executes
4520 ratio = ni_name / vf
4521 ratio_mult_vf_name = ratio * vf
4522
4523 and places them at the loop preheader edge. */
4524
4525 static void
4526 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
4527 tree *ni_name_ptr,
4528 tree *ratio_mult_vf_name_ptr,
4529 tree *ratio_name_ptr)
4530 {
4531
4532 edge pe;
4533 basic_block new_bb;
4534 tree stmt, ni_name;
4535 tree var;
4536 tree ratio_name;
4537 tree ratio_mult_vf_name;
4538 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4539 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
4540 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4541 tree log_vf;
4542
4543 pe = loop_preheader_edge (loop);
4544
4545 /* Generate temporary variable that contains
4546 number of iterations loop executes. */
4547
4548 ni_name = vect_build_loop_niters (loop_vinfo);
4549 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
4550
4551 /* Create: ratio = ni >> log2(vf) */
4552
4553 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
4554 if (!is_gimple_val (ratio_name))
4555 {
4556 var = create_tmp_var (TREE_TYPE (ni), "bnd");
4557 add_referenced_var (var);
4558
4559 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
4560 pe = loop_preheader_edge (loop);
4561 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4562 gcc_assert (!new_bb);
4563 }
4564
4565 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4566
4567 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
4568 ratio_name, log_vf);
4569 if (!is_gimple_val (ratio_mult_vf_name))
4570 {
4571 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
4572 add_referenced_var (var);
4573
4574 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
4575 true, var);
4576 pe = loop_preheader_edge (loop);
4577 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4578 gcc_assert (!new_bb);
4579 }
4580
4581 *ni_name_ptr = ni_name;
4582 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
4583 *ratio_name_ptr = ratio_name;
4584
4585 return;
4586 }
4587
4588
4589 /* Function update_vuses_to_preheader.
4590
4591 Input:
4592 STMT - a statement with potential VUSEs.
4593 LOOP - the loop whose preheader will contain STMT.
4594
4595 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4596 appears to be defined in a VDEF in another statement in a loop.
4597 One such case is when the VUSE is at the dereference of a __restricted__
4598 pointer in a load and the VDEF is at the dereference of a different
4599 __restricted__ pointer in a store. Vectorization may result in
4600 copy_virtual_uses being called to copy the problematic VUSE to a new
4601 statement that is being inserted in the loop preheader. This procedure
4602 is called to change the SSA_NAME in the new statement's VUSE from the
4603 SSA_NAME updated in the loop to the related SSA_NAME available on the
4604 path entering the loop.
4605
4606 When this function is called, we have the following situation:
4607
4608 # vuse <name1>
4609 S1: vload
4610 do {
4611 # name1 = phi < name0 , name2>
4612
4613 # vuse <name1>
4614 S2: vload
4615
4616 # name2 = vdef <name1>
4617 S3: vstore
4618
4619 }while...
4620
4621 Stmt S1 was created in the loop preheader block as part of misaligned-load
4622 handling. This function fixes the name of the vuse of S1 from 'name1' to
4623 'name0'. */
4624
4625 static void
4626 update_vuses_to_preheader (tree stmt, struct loop *loop)
4627 {
4628 basic_block header_bb = loop->header;
4629 edge preheader_e = loop_preheader_edge (loop);
4630 ssa_op_iter iter;
4631 use_operand_p use_p;
4632
4633 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
4634 {
4635 tree ssa_name = USE_FROM_PTR (use_p);
4636 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4637 tree name_var = SSA_NAME_VAR (ssa_name);
4638 basic_block bb = bb_for_stmt (def_stmt);
4639
4640 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4641 if (!IS_EMPTY_STMT (def_stmt)
4642 && flow_bb_inside_loop_p (loop, bb))
4643 {
4644 /* If the block containing the statement defining the SSA_NAME
4645 is in the loop then it's necessary to find the definition
4646 outside the loop using the PHI nodes of the header. */
4647 tree phi;
4648 bool updated = false;
4649
4650 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
4651 {
4652 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
4653 {
4654 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
4655 updated = true;
4656 break;
4657 }
4658 }
4659 gcc_assert (updated);
4660 }
4661 }
4662 }
4663
4664
4665 /* Function vect_update_ivs_after_vectorizer.
4666
4667 "Advance" the induction variables of LOOP to the value they should take
4668 after the execution of LOOP. This is currently necessary because the
4669 vectorizer does not handle induction variables that are used after the
4670 loop. Such a situation occurs when the last iterations of LOOP are
4671 peeled, because:
4672 1. We introduced new uses after LOOP for IVs that were not originally used
4673 after LOOP: the IVs of LOOP are now used by an epilog loop.
4674 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4675 times, whereas the loop IVs should be bumped N times.
4676
4677 Input:
4678 - LOOP - a loop that is going to be vectorized. The last few iterations
4679 of LOOP were peeled.
4680 - NITERS - the number of iterations that LOOP executes (before it is
4681 vectorized). i.e, the number of times the ivs should be bumped.
4682 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4683 coming out from LOOP on which there are uses of the LOOP ivs
4684 (this is the path from LOOP->exit to epilog_loop->preheader).
4685
4686 The new definitions of the ivs are placed in LOOP->exit.
4687 The phi args associated with the edge UPDATE_E in the bb
4688 UPDATE_E->dest are updated accordingly.
4689
4690 Assumption 1: Like the rest of the vectorizer, this function assumes
4691 a single loop exit that has a single predecessor.
4692
4693 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4694 organized in the same order.
4695
4696 Assumption 3: The access function of the ivs is simple enough (see
4697 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4698
4699 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4700 coming out of LOOP on which the ivs of LOOP are used (this is the path
4701 that leads to the epilog loop; other paths skip the epilog loop). This
4702 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4703 needs to have its phis updated.
4704 */
4705
4706 static void
4707 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
4708 edge update_e)
4709 {
4710 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4711 basic_block exit_bb = single_exit (loop)->dest;
4712 tree phi, phi1;
4713 basic_block update_bb = update_e->dest;
4714
4715 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4716
4717 /* Make sure there exists a single-predecessor exit bb: */
4718 gcc_assert (single_pred_p (exit_bb));
4719
4720 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
4721 phi && phi1;
4722 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
4723 {
4724 tree access_fn = NULL;
4725 tree evolution_part;
4726 tree init_expr;
4727 tree step_expr;
4728 tree var, stmt, ni, ni_name;
4729 block_stmt_iterator last_bsi;
4730
4731 if (vect_print_dump_info (REPORT_DETAILS))
4732 {
4733 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
4734 print_generic_expr (vect_dump, phi, TDF_SLIM);
4735 }
4736
4737 /* Skip virtual phi's. */
4738 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4739 {
4740 if (vect_print_dump_info (REPORT_DETAILS))
4741 fprintf (vect_dump, "virtual phi. skip.");
4742 continue;
4743 }
4744
4745 /* Skip reduction phis. */
4746 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4747 {
4748 if (vect_print_dump_info (REPORT_DETAILS))
4749 fprintf (vect_dump, "reduc phi. skip.");
4750 continue;
4751 }
4752
4753 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
4754 gcc_assert (access_fn);
4755 evolution_part =
4756 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
4757 gcc_assert (evolution_part != NULL_TREE);
4758
4759 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4760 of degree >= 2 or exponential. */
4761 gcc_assert (!tree_is_chrec (evolution_part));
4762
4763 step_expr = evolution_part;
4764 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
4765 loop->num));
4766
4767 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
4768 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
4769 fold_convert (TREE_TYPE (init_expr),
4770 niters),
4771 step_expr),
4772 init_expr);
4773
4774 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
4775 add_referenced_var (var);
4776
4777 ni_name = force_gimple_operand (ni, &stmt, false, var);
4778
4779 /* Insert stmt into exit_bb. */
4780 last_bsi = bsi_last (exit_bb);
4781 if (stmt)
4782 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
4783
4784 /* Fix phi expressions in the successor bb. */
4785 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
4786 }
4787 }
4788
4789
4790 /* Function vect_do_peeling_for_loop_bound
4791
4792 Peel the last iterations of the loop represented by LOOP_VINFO.
4793 The peeled iterations form a new epilog loop. Given that the loop now
4794 iterates NITERS times, the new epilog loop iterates
4795 NITERS % VECTORIZATION_FACTOR times.
4796
4797 The original loop will later be made to iterate
4798 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4799
4800 static void
4801 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
4802 {
4803 tree ni_name, ratio_mult_vf_name;
4804 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4805 struct loop *new_loop;
4806 edge update_e;
4807 basic_block preheader;
4808 int loop_num;
4809 unsigned int th;
4810
4811 if (vect_print_dump_info (REPORT_DETAILS))
4812 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
4813
4814 initialize_original_copy_tables ();
4815
4816 /* Generate the following variables on the preheader of original loop:
4817
4818 ni_name = number of iteration the original loop executes
4819 ratio = ni_name / vf
4820 ratio_mult_vf_name = ratio * vf */
4821 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
4822 &ratio_mult_vf_name, ratio);
4823
4824 loop_num = loop->num;
4825 /* Threshold for vectorized loop. */
4826 th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) *
4827 LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4828 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
4829 ratio_mult_vf_name, ni_name, false, th);
4830 gcc_assert (new_loop);
4831 gcc_assert (loop_num == loop->num);
4832 #ifdef ENABLE_CHECKING
4833 slpeel_verify_cfg_after_peeling (loop, new_loop);
4834 #endif
4835
4836 /* A guard that controls whether the new_loop is to be executed or skipped
4837 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4838 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4839 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4840 is on the path where the LOOP IVs are used and need to be updated. */
4841
4842 preheader = loop_preheader_edge (new_loop)->src;
4843 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
4844 update_e = EDGE_PRED (preheader, 0);
4845 else
4846 update_e = EDGE_PRED (preheader, 1);
4847
4848 /* Update IVs of original loop as if they were advanced
4849 by ratio_mult_vf_name steps. */
4850 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
4851
4852 /* After peeling we have to reset scalar evolution analyzer. */
4853 scev_reset ();
4854
4855 free_original_copy_tables ();
4856 }
4857
4858
4859 /* Function vect_gen_niters_for_prolog_loop
4860
4861 Set the number of iterations for the loop represented by LOOP_VINFO
4862 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4863 and the misalignment of DR - the data reference recorded in
4864 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4865 this loop, the data reference DR will refer to an aligned location.
4866
4867 The following computation is generated:
4868
4869 If the misalignment of DR is known at compile time:
4870 addr_mis = int mis = DR_MISALIGNMENT (dr);
4871 Else, compute address misalignment in bytes:
4872 addr_mis = addr & (vectype_size - 1)
4873
4874 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4875
4876 (elem_size = element type size; an element is the scalar element
4877 whose type is the inner type of the vectype)
4878
4879 For interleaving,
4880
4881 prolog_niters = min ( LOOP_NITERS ,
4882 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
4883 where group_size is the size of the interleaved group.
4884
4885 The above formulas assume that VF == number of elements in the vector. This
4886 may not hold when there are multiple-types in the loop.
4887 In this case, for some data-references in the loop the VF does not represent
4888 the number of elements that fit in the vector. Therefore, instead of VF we
4889 use TYPE_VECTOR_SUBPARTS. */
4890
4891 static tree
4892 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
4893 {
4894 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
4895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4896 tree var, stmt;
4897 tree iters, iters_name;
4898 edge pe;
4899 basic_block new_bb;
4900 tree dr_stmt = DR_STMT (dr);
4901 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
4902 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4903 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
4904 tree niters_type = TREE_TYPE (loop_niters);
4905 int group_size = 1;
4906 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
4907 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
4908
4909 if (DR_GROUP_FIRST_DR (stmt_info))
4910 {
4911 /* For interleaved access element size must be multiplied by the size of
4912 the interleaved group. */
4913 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
4914 DR_GROUP_FIRST_DR (stmt_info)));
4915 element_size *= group_size;
4916 }
4917
4918 pe = loop_preheader_edge (loop);
4919
4920 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
4921 {
4922 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
4923 int elem_misalign = byte_misalign / element_size;
4924
4925 if (vect_print_dump_info (REPORT_DETAILS))
4926 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
4927 iters = build_int_cst (niters_type,
4928 (nelements - elem_misalign)&(nelements/group_size-1));
4929 }
4930 else
4931 {
4932 tree new_stmts = NULL_TREE;
4933 tree start_addr =
4934 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
4935 tree ptr_type = TREE_TYPE (start_addr);
4936 tree size = TYPE_SIZE (ptr_type);
4937 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
4938 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
4939 tree elem_size_log =
4940 build_int_cst (type, exact_log2 (vectype_align/nelements));
4941 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
4942 tree nelements_tree = build_int_cst (type, nelements);
4943 tree byte_misalign;
4944 tree elem_misalign;
4945
4946 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
4947 gcc_assert (!new_bb);
4948
4949 /* Create: byte_misalign = addr & (vectype_size - 1) */
4950 byte_misalign =
4951 fold_build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
4952
4953 /* Create: elem_misalign = byte_misalign / element_size */
4954 elem_misalign =
4955 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
4956
4957 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
4958 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
4959 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
4960 iters = fold_convert (niters_type, iters);
4961 }
4962
4963 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4964 /* If the loop bound is known at compile time we already verified that it is
4965 greater than vf; since the misalignment ('iters') is at most vf, there's
4966 no need to generate the MIN_EXPR in this case. */
4967 if (TREE_CODE (loop_niters) != INTEGER_CST)
4968 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
4969
4970 if (vect_print_dump_info (REPORT_DETAILS))
4971 {
4972 fprintf (vect_dump, "niters for prolog loop: ");
4973 print_generic_expr (vect_dump, iters, TDF_SLIM);
4974 }
4975
4976 var = create_tmp_var (niters_type, "prolog_loop_niters");
4977 add_referenced_var (var);
4978 iters_name = force_gimple_operand (iters, &stmt, false, var);
4979
4980 /* Insert stmt on loop preheader edge. */
4981 if (stmt)
4982 {
4983 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4984 gcc_assert (!new_bb);
4985 }
4986
4987 return iters_name;
4988 }
4989
4990
4991 /* Function vect_update_init_of_dr
4992
4993 NITERS iterations were peeled from LOOP. DR represents a data reference
4994 in LOOP. This function updates the information recorded in DR to
4995 account for the fact that the first NITERS iterations had already been
4996 executed. Specifically, it updates the OFFSET field of DR. */
4997
4998 static void
4999 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5000 {
5001 tree offset = DR_OFFSET (dr);
5002
5003 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5004 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5005 DR_OFFSET (dr) = offset;
5006 }
5007
5008
5009 /* Function vect_update_inits_of_drs
5010
5011 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5012 This function updates the information recorded for the data references in
5013 the loop to account for the fact that the first NITERS iterations had
5014 already been executed. Specifically, it updates the initial_condition of
5015 the access_function of all the data_references in the loop. */
5016
5017 static void
5018 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5019 {
5020 unsigned int i;
5021 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5022 struct data_reference *dr;
5023
5024 if (vect_print_dump_info (REPORT_DETAILS))
5025 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5026
5027 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5028 vect_update_init_of_dr (dr, niters);
5029 }
5030
5031
5032 /* Function vect_do_peeling_for_alignment
5033
5034 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5035 'niters' is set to the misalignment of one of the data references in the
5036 loop, thereby forcing it to refer to an aligned location at the beginning
5037 of the execution of this loop. The data reference for which we are
5038 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5039
5040 static void
5041 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5042 {
5043 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5044 tree niters_of_prolog_loop, ni_name;
5045 tree n_iters;
5046 struct loop *new_loop;
5047
5048 if (vect_print_dump_info (REPORT_DETAILS))
5049 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5050
5051 initialize_original_copy_tables ();
5052
5053 ni_name = vect_build_loop_niters (loop_vinfo);
5054 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5055
5056 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5057 new_loop =
5058 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5059 niters_of_prolog_loop, ni_name, true, 0);
5060 gcc_assert (new_loop);
5061 #ifdef ENABLE_CHECKING
5062 slpeel_verify_cfg_after_peeling (new_loop, loop);
5063 #endif
5064
5065 /* Update number of times loop executes. */
5066 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5067 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5068 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5069
5070 /* Update the init conditions of the access functions of all data refs. */
5071 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5072
5073 /* After peeling we have to reset scalar evolution analyzer. */
5074 scev_reset ();
5075
5076 free_original_copy_tables ();
5077 }
5078
5079
5080 /* Function vect_create_cond_for_align_checks.
5081
5082 Create a conditional expression that represents the alignment checks for
5083 all of data references (array element references) whose alignment must be
5084 checked at runtime.
5085
5086 Input:
5087 LOOP_VINFO - two fields of the loop information are used.
5088 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5089 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5090
5091 Output:
5092 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5093 expression.
5094 The returned value is the conditional expression to be used in the if
5095 statement that controls which version of the loop gets executed at runtime.
5096
5097 The algorithm makes two assumptions:
5098 1) The number of bytes "n" in a vector is a power of 2.
5099 2) An address "a" is aligned if a%n is zero and that this
5100 test can be done as a&(n-1) == 0. For example, for 16
5101 byte vectors the test is a&0xf == 0. */
5102
5103 static tree
5104 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5105 tree *cond_expr_stmt_list)
5106 {
5107 VEC(tree,heap) *may_misalign_stmts
5108 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5109 tree ref_stmt, tmp;
5110 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5111 tree mask_cst;
5112 unsigned int i;
5113 tree psize;
5114 tree int_ptrsize_type;
5115 char tmp_name[20];
5116 tree or_tmp_name = NULL_TREE;
5117 tree and_tmp, and_tmp_name, and_stmt;
5118 tree ptrsize_zero;
5119
5120 /* Check that mask is one less than a power of 2, i.e., mask is
5121 all zeros followed by all ones. */
5122 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5123
5124 /* CHECKME: what is the best integer or unsigned type to use to hold a
5125 cast from a pointer value? */
5126 psize = TYPE_SIZE (ptr_type_node);
5127 int_ptrsize_type
5128 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5129
5130 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5131 of the first vector of the i'th data reference. */
5132
5133 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5134 {
5135 tree new_stmt_list = NULL_TREE;
5136 tree addr_base;
5137 tree addr_tmp, addr_tmp_name, addr_stmt;
5138 tree or_tmp, new_or_tmp_name, or_stmt;
5139
5140 /* create: addr_tmp = (int)(address_of_first_vector) */
5141 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5142 &new_stmt_list,
5143 NULL_TREE);
5144
5145 if (new_stmt_list != NULL_TREE)
5146 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5147
5148 sprintf (tmp_name, "%s%d", "addr2int", i);
5149 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5150 add_referenced_var (addr_tmp);
5151 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5152 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5153 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5154 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5155 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5156
5157 /* The addresses are OR together. */
5158
5159 if (or_tmp_name != NULL_TREE)
5160 {
5161 /* create: or_tmp = or_tmp | addr_tmp */
5162 sprintf (tmp_name, "%s%d", "orptrs", i);
5163 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5164 add_referenced_var (or_tmp);
5165 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5166 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5167 or_tmp_name, addr_tmp_name);
5168 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5169 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5170 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5171 or_tmp_name = new_or_tmp_name;
5172 }
5173 else
5174 or_tmp_name = addr_tmp_name;
5175
5176 } /* end for i */
5177
5178 mask_cst = build_int_cst (int_ptrsize_type, mask);
5179
5180 /* create: and_tmp = or_tmp & mask */
5181 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5182 add_referenced_var (and_tmp);
5183 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5184
5185 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5186 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5187 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5188 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5189
5190 /* Make and_tmp the left operand of the conditional test against zero.
5191 if and_tmp has a nonzero bit then some address is unaligned. */
5192 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5193 return build2 (EQ_EXPR, boolean_type_node,
5194 and_tmp_name, ptrsize_zero);
5195 }
5196
5197
5198 /* Function vect_transform_loop.
5199
5200 The analysis phase has determined that the loop is vectorizable.
5201 Vectorize the loop - created vectorized stmts to replace the scalar
5202 stmts in the loop, and update the loop exit condition. */
5203
5204 void
5205 vect_transform_loop (loop_vec_info loop_vinfo)
5206 {
5207 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5208 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5209 int nbbs = loop->num_nodes;
5210 block_stmt_iterator si, next_si;
5211 int i;
5212 tree ratio = NULL;
5213 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5214 bool strided_store;
5215
5216 if (vect_print_dump_info (REPORT_DETAILS))
5217 fprintf (vect_dump, "=== vec_transform_loop ===");
5218
5219 /* If the loop has data references that may or may not be aligned then
5220 two versions of the loop need to be generated, one which is vectorized
5221 and one which isn't. A test is then generated to control which of the
5222 loops is executed. The test checks for the alignment of all of the
5223 data references that may or may not be aligned. */
5224
5225 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5226 {
5227 struct loop *nloop;
5228 tree cond_expr;
5229 tree cond_expr_stmt_list = NULL_TREE;
5230 basic_block condition_bb;
5231 block_stmt_iterator cond_exp_bsi;
5232 basic_block merge_bb;
5233 basic_block new_exit_bb;
5234 edge new_exit_e, e;
5235 tree orig_phi, new_phi, arg;
5236 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5237
5238 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5239 &cond_expr_stmt_list);
5240 initialize_original_copy_tables ();
5241 nloop = loop_version (loop, cond_expr, &condition_bb,
5242 prob, prob, REG_BR_PROB_BASE - prob, true);
5243 free_original_copy_tables();
5244
5245 /** Loop versioning violates an assumption we try to maintain during
5246 vectorization - that the loop exit block has a single predecessor.
5247 After versioning, the exit block of both loop versions is the same
5248 basic block (i.e. it has two predecessors). Just in order to simplify
5249 following transformations in the vectorizer, we fix this situation
5250 here by adding a new (empty) block on the exit-edge of the loop,
5251 with the proper loop-exit phis to maintain loop-closed-form. **/
5252
5253 merge_bb = single_exit (loop)->dest;
5254 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5255 new_exit_bb = split_edge (single_exit (loop));
5256 new_exit_e = single_exit (loop);
5257 e = EDGE_SUCC (new_exit_bb, 0);
5258
5259 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5260 orig_phi = PHI_CHAIN (orig_phi))
5261 {
5262 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5263 new_exit_bb);
5264 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5265 add_phi_arg (new_phi, arg, new_exit_e);
5266 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5267 }
5268
5269 /** end loop-exit-fixes after versioning **/
5270
5271 update_ssa (TODO_update_ssa);
5272 cond_exp_bsi = bsi_last (condition_bb);
5273 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5274 }
5275
5276 /* CHECKME: we wouldn't need this if we called update_ssa once
5277 for all loops. */
5278 bitmap_zero (vect_memsyms_to_rename);
5279
5280 /* Peel the loop if there are data refs with unknown alignment.
5281 Only one data ref with unknown store is allowed. */
5282
5283 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5284 vect_do_peeling_for_alignment (loop_vinfo);
5285
5286 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5287 compile time constant), or it is a constant that doesn't divide by the
5288 vectorization factor, then an epilog loop needs to be created.
5289 We therefore duplicate the loop: the original loop will be vectorized,
5290 and will compute the first (n/VF) iterations. The second copy of the loop
5291 will remain scalar and will compute the remaining (n%VF) iterations.
5292 (VF is the vectorization factor). */
5293
5294 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5295 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5296 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5297 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5298 else
5299 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5300 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5301
5302 /* 1) Make sure the loop header has exactly two entries
5303 2) Make sure we have a preheader basic block. */
5304
5305 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5306
5307 split_edge (loop_preheader_edge (loop));
5308
5309 /* FORNOW: the vectorizer supports only loops which body consist
5310 of one basic block (header + empty latch). When the vectorizer will
5311 support more involved loop forms, the order by which the BBs are
5312 traversed need to be reconsidered. */
5313
5314 for (i = 0; i < nbbs; i++)
5315 {
5316 basic_block bb = bbs[i];
5317 stmt_vec_info stmt_info;
5318 tree phi;
5319
5320 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5321 {
5322 if (vect_print_dump_info (REPORT_DETAILS))
5323 {
5324 fprintf (vect_dump, "------>vectorizing phi: ");
5325 print_generic_expr (vect_dump, phi, TDF_SLIM);
5326 }
5327 stmt_info = vinfo_for_stmt (phi);
5328 if (!stmt_info)
5329 continue;
5330 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5331 && !STMT_VINFO_LIVE_P (stmt_info))
5332 continue;
5333
5334 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5335 != (unsigned HOST_WIDE_INT) vectorization_factor)
5336 && vect_print_dump_info (REPORT_DETAILS))
5337 fprintf (vect_dump, "multiple-types.");
5338
5339 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5340 {
5341 if (vect_print_dump_info (REPORT_DETAILS))
5342 fprintf (vect_dump, "transform phi.");
5343 vect_transform_stmt (phi, NULL, NULL);
5344 }
5345 }
5346
5347 for (si = bsi_start (bb); !bsi_end_p (si);)
5348 {
5349 tree stmt = bsi_stmt (si);
5350 bool is_store;
5351
5352 if (vect_print_dump_info (REPORT_DETAILS))
5353 {
5354 fprintf (vect_dump, "------>vectorizing statement: ");
5355 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5356 }
5357 stmt_info = vinfo_for_stmt (stmt);
5358 gcc_assert (stmt_info);
5359 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5360 && !STMT_VINFO_LIVE_P (stmt_info))
5361 {
5362 bsi_next (&si);
5363 continue;
5364 }
5365
5366 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5367 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5368 != (unsigned HOST_WIDE_INT) vectorization_factor)
5369 && vect_print_dump_info (REPORT_DETAILS))
5370 fprintf (vect_dump, "multiple-types.");
5371
5372 /* -------- vectorize statement ------------ */
5373 if (vect_print_dump_info (REPORT_DETAILS))
5374 fprintf (vect_dump, "transform statement.");
5375
5376 strided_store = false;
5377 is_store = vect_transform_stmt (stmt, &si, &strided_store);
5378 if (is_store)
5379 {
5380 stmt_ann_t ann;
5381 if (DR_GROUP_FIRST_DR (stmt_info))
5382 {
5383 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5384 interleaving chain was completed - free all the stores in
5385 the chain. */
5386 tree next = DR_GROUP_FIRST_DR (stmt_info);
5387 tree tmp;
5388 stmt_vec_info next_stmt_info;
5389
5390 while (next)
5391 {
5392 next_si = bsi_for_stmt (next);
5393 next_stmt_info = vinfo_for_stmt (next);
5394 /* Free the attached stmt_vec_info and remove the stmt. */
5395 ann = stmt_ann (next);
5396 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
5397 free (next_stmt_info);
5398 set_stmt_info (ann, NULL);
5399 bsi_remove (&next_si, true);
5400 next = tmp;
5401 }
5402 bsi_remove (&si, true);
5403 continue;
5404 }
5405 else
5406 {
5407 /* Free the attached stmt_vec_info and remove the stmt. */
5408 ann = stmt_ann (stmt);
5409 free (stmt_info);
5410 set_stmt_info (ann, NULL);
5411 bsi_remove (&si, true);
5412 continue;
5413 }
5414 }
5415 bsi_next (&si);
5416 } /* stmts in BB */
5417 } /* BBs in loop */
5418
5419 slpeel_make_loop_iterate_ntimes (loop, ratio);
5420
5421 mark_set_for_renaming (vect_memsyms_to_rename);
5422
5423 /* The memory tags and pointers in vectorized statements need to
5424 have their SSA forms updated. FIXME, why can't this be delayed
5425 until all the loops have been transformed? */
5426 update_ssa (TODO_update_ssa);
5427
5428 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
5429 fprintf (vect_dump, "LOOP VECTORIZED.");
5430 }