arm.h (FUNCTION_ARG_ADVANCE): Only adjust iwmmxt_nregs if TARGET_IWMMXT_ABI.
[gcc.git] / gcc / tree-vect-transform.c
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
47
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
64
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74 static void vect_do_peeling_for_loop_bound
75 (loop_vec_info, tree *, struct loops *);
76 static int vect_min_worthwhile_factor (enum tree_code);
77
78
79 /* Function vect_get_new_vect_var.
80
81 Returns a name for a new variable. The current naming scheme appends the
82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83 the name of vectorizer generated variables, and appends that to NAME if
84 provided. */
85
86 static tree
87 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88 {
89 const char *prefix;
90 tree new_vect_var;
91
92 switch (var_kind)
93 {
94 case vect_simple_var:
95 prefix = "vect_";
96 break;
97 case vect_scalar_var:
98 prefix = "stmp_";
99 break;
100 case vect_pointer_var:
101 prefix = "vect_p";
102 break;
103 default:
104 gcc_unreachable ();
105 }
106
107 if (name)
108 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109 else
110 new_vect_var = create_tmp_var (type, prefix);
111
112 return new_vect_var;
113 }
114
115
116 /* Function vect_create_addr_base_for_vector_ref.
117
118 Create an expression that computes the address of the first memory location
119 that will be accessed for a data reference.
120
121 Input:
122 STMT: The statement containing the data reference.
123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124 OFFSET: Optional. If supplied, it is be added to the initial address.
125
126 Output:
127 1. Return an SSA_NAME whose value is the address of the memory location of
128 the first vector of the data reference.
129 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130 these statement(s) which define the returned SSA_NAME.
131
132 FORNOW: We are only handling array accesses with step 1. */
133
134 static tree
135 vect_create_addr_base_for_vector_ref (tree stmt,
136 tree *new_stmt_list,
137 tree offset)
138 {
139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142 tree base_name = build_fold_indirect_ref (data_ref_base);
143 tree ref = DR_REF (dr);
144 tree scalar_type = TREE_TYPE (ref);
145 tree scalar_ptr_type = build_pointer_type (scalar_type);
146 tree vec_stmt;
147 tree new_temp;
148 tree addr_base, addr_expr;
149 tree dest, new_stmt;
150 tree base_offset = unshare_expr (DR_OFFSET (dr));
151 tree init = unshare_expr (DR_INIT (dr));
152
153 /* Create base_offset */
154 base_offset = size_binop (PLUS_EXPR, base_offset, init);
155 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156 add_referenced_var (dest);
157 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158 append_to_statement_list_force (new_stmt, new_stmt_list);
159
160 if (offset)
161 {
162 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163 add_referenced_var (tmp);
164 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
165 DR_STEP (dr));
166 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167 base_offset, offset);
168 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169 append_to_statement_list_force (new_stmt, new_stmt_list);
170 }
171
172 /* base + base_offset */
173 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
174 base_offset);
175
176 /* addr_expr = addr_base */
177 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178 get_name (base_name));
179 add_referenced_var (addr_expr);
180 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181 new_temp = make_ssa_name (addr_expr, vec_stmt);
182 TREE_OPERAND (vec_stmt, 0) = new_temp;
183 append_to_statement_list_force (vec_stmt, new_stmt_list);
184
185 if (vect_print_dump_info (REPORT_DETAILS))
186 {
187 fprintf (vect_dump, "created ");
188 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
189 }
190 return new_temp;
191 }
192
193
194 /* Function vect_create_data_ref_ptr.
195
196 Create a new pointer to vector type (vp), that points to the first location
197 accessed in the loop by STMT, along with the def-use update chain to
198 appropriately advance the pointer through the loop iterations. Also set
199 aliasing information for the pointer. This vector pointer is used by the
200 callers to this function to create a memory reference expression for vector
201 load/store access.
202
203 Input:
204 1. STMT: a stmt that references memory. Expected to be of the form
205 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
206 2. BSI: block_stmt_iterator where new stmts can be added.
207 3. OFFSET (optional): an offset to be added to the initial address accessed
208 by the data-ref in STMT.
209 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
210 pointing to the initial address.
211
212 Output:
213 1. Declare a new ptr to vector_type, and have it point to the base of the
214 data reference (initial addressed accessed by the data reference).
215 For example, for vector of type V8HI, the following code is generated:
216
217 v8hi *vp;
218 vp = (v8hi *)initial_address;
219
220 if OFFSET is not supplied:
221 initial_address = &a[init];
222 if OFFSET is supplied:
223 initial_address = &a[init + OFFSET];
224
225 Return the initial_address in INITIAL_ADDRESS.
226
227 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
228 update the pointer in each iteration of the loop.
229
230 Return the increment stmt that updates the pointer in PTR_INCR.
231
232 3. Return the pointer. */
233
234 static tree
235 vect_create_data_ref_ptr (tree stmt,
236 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
237 tree offset, tree *initial_address, tree *ptr_incr,
238 bool only_init)
239 {
240 tree base_name;
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
244 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
245 tree vect_ptr_type;
246 tree vect_ptr;
247 tree tag;
248 tree new_temp;
249 tree vec_stmt;
250 tree new_stmt_list = NULL_TREE;
251 edge pe = loop_preheader_edge (loop);
252 basic_block new_bb;
253 tree vect_ptr_init;
254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
255
256 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
257
258 if (vect_print_dump_info (REPORT_DETAILS))
259 {
260 tree data_ref_base = base_name;
261 fprintf (vect_dump, "create vector-pointer variable to type: ");
262 print_generic_expr (vect_dump, vectype, TDF_SLIM);
263 if (TREE_CODE (data_ref_base) == VAR_DECL)
264 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
265 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
266 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
267 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
268 fprintf (vect_dump, " vectorizing a record based array ref: ");
269 else if (TREE_CODE (data_ref_base) == SSA_NAME)
270 fprintf (vect_dump, " vectorizing a pointer ref: ");
271 print_generic_expr (vect_dump, base_name, TDF_SLIM);
272 }
273
274 /** (1) Create the new vector-pointer variable: **/
275
276 vect_ptr_type = build_pointer_type (vectype);
277 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
278 get_name (base_name));
279 add_referenced_var (vect_ptr);
280
281
282 /** (2) Add aliasing information to the new vector-pointer:
283 (The points-to info (DR_PTR_INFO) may be defined later.) **/
284
285 tag = DR_MEMTAG (dr);
286 gcc_assert (tag);
287
288 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
289 tag must be created with tag added to its may alias list. */
290 if (!MTAG_P (tag))
291 new_type_alias (vect_ptr, tag, DR_REF (dr));
292 else
293 var_ann (vect_ptr)->symbol_mem_tag = tag;
294
295 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
296
297 /** (3) Calculate the initial address the vector-pointer, and set
298 the vector-pointer to point to it before the loop: **/
299
300 /* Create: (&(base[init_val+offset]) in the loop preheader. */
301 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
302 offset);
303 pe = loop_preheader_edge (loop);
304 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
305 gcc_assert (!new_bb);
306 *initial_address = new_temp;
307
308 /* Create: p = (vectype *) initial_base */
309 vec_stmt = fold_convert (vect_ptr_type, new_temp);
310 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
311 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
312 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
313 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
314 gcc_assert (!new_bb);
315
316
317 /** (4) Handle the updating of the vector-pointer inside the loop: **/
318
319 if (only_init) /* No update in loop is required. */
320 {
321 /* Copy the points-to information if it exists. */
322 if (DR_PTR_INFO (dr))
323 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
324 return vect_ptr_init;
325 }
326 else
327 {
328 block_stmt_iterator incr_bsi;
329 bool insert_after;
330 tree indx_before_incr, indx_after_incr;
331 tree incr;
332
333 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
334 create_iv (vect_ptr_init,
335 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
336 NULL_TREE, loop, &incr_bsi, insert_after,
337 &indx_before_incr, &indx_after_incr);
338 incr = bsi_stmt (incr_bsi);
339 set_stmt_info (stmt_ann (incr),
340 new_stmt_vec_info (incr, loop_vinfo));
341
342 /* Copy the points-to information if it exists. */
343 if (DR_PTR_INFO (dr))
344 {
345 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
346 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
347 }
348 merge_alias_info (vect_ptr_init, indx_before_incr);
349 merge_alias_info (vect_ptr_init, indx_after_incr);
350 if (ptr_incr)
351 *ptr_incr = incr;
352
353 return indx_before_incr;
354 }
355 }
356
357
358 /* Function bump_vector_ptr
359
360 Increment a pointer (to a vector type) by vector-size. Connect the new
361 increment stmt to the exising def-use update-chain of the pointer.
362
363 The pointer def-use update-chain before this function:
364 DATAREF_PTR = phi (p_0, p_2)
365 ....
366 PTR_INCR: p_2 = DATAREF_PTR + step
367
368 The pointer def-use update-chain after this function:
369 DATAREF_PTR = phi (p_0, p_2)
370 ....
371 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
372 ....
373 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
374
375 Input:
376 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
377 in the loop.
378 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
379 The increment amount across iterations is also expected to be
380 vector_size.
381 BSI - location where the new update stmt is to be placed.
382 STMT - the original scalar memory-access stmt that is being vectorized.
383
384 Output: Return NEW_DATAREF_PTR as illustrated above.
385
386 */
387
388 static tree
389 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
390 tree stmt)
391 {
392 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
393 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
394 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
395 tree vptr_type = TREE_TYPE (dataref_ptr);
396 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
397 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
398 tree incr_stmt;
399 ssa_op_iter iter;
400 use_operand_p use_p;
401 tree new_dataref_ptr;
402
403 incr_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_var,
404 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
405 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
406 TREE_OPERAND (incr_stmt, 0) = new_dataref_ptr;
407 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
408
409 /* Update the vector-pointer's cross-iteration increment. */
410 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
411 {
412 tree use = USE_FROM_PTR (use_p);
413
414 if (use == dataref_ptr)
415 SET_USE (use_p, new_dataref_ptr);
416 else
417 gcc_assert (tree_int_cst_compare (use, update) == 0);
418 }
419
420 /* Copy the points-to information if it exists. */
421 if (DR_PTR_INFO (dr))
422 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
423 merge_alias_info (new_dataref_ptr, dataref_ptr);
424
425 return new_dataref_ptr;
426 }
427
428
429 /* Function vect_create_destination_var.
430
431 Create a new temporary of type VECTYPE. */
432
433 static tree
434 vect_create_destination_var (tree scalar_dest, tree vectype)
435 {
436 tree vec_dest;
437 const char *new_name;
438 tree type;
439 enum vect_var_kind kind;
440
441 kind = vectype ? vect_simple_var : vect_scalar_var;
442 type = vectype ? vectype : TREE_TYPE (scalar_dest);
443
444 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
445
446 new_name = get_name (scalar_dest);
447 if (!new_name)
448 new_name = "var_";
449 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
450 add_referenced_var (vec_dest);
451
452 return vec_dest;
453 }
454
455
456 /* Function vect_init_vector.
457
458 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
459 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
460 used in the vectorization of STMT. */
461
462 static tree
463 vect_init_vector (tree stmt, tree vector_var)
464 {
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468 tree new_var;
469 tree init_stmt;
470 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
471 tree vec_oprnd;
472 edge pe;
473 tree new_temp;
474 basic_block new_bb;
475
476 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
477 add_referenced_var (new_var);
478
479 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
480 new_temp = make_ssa_name (new_var, init_stmt);
481 TREE_OPERAND (init_stmt, 0) = new_temp;
482
483 pe = loop_preheader_edge (loop);
484 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
485 gcc_assert (!new_bb);
486
487 if (vect_print_dump_info (REPORT_DETAILS))
488 {
489 fprintf (vect_dump, "created new init_stmt: ");
490 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
491 }
492
493 vec_oprnd = TREE_OPERAND (init_stmt, 0);
494 return vec_oprnd;
495 }
496
497
498 /* Function vect_get_vec_def_for_operand.
499
500 OP is an operand in STMT. This function returns a (vector) def that will be
501 used in the vectorized stmt for STMT.
502
503 In the case that OP is an SSA_NAME which is defined in the loop, then
504 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
505
506 In case OP is an invariant or constant, a new stmt that creates a vector def
507 needs to be introduced. */
508
509 static tree
510 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
511 {
512 tree vec_oprnd;
513 tree vec_stmt;
514 tree def_stmt;
515 stmt_vec_info def_stmt_info = NULL;
516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
517 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
518 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
520 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 tree vec_inv;
522 tree vec_cst;
523 tree t = NULL_TREE;
524 tree def;
525 int i;
526 enum vect_def_type dt;
527 bool is_simple_use;
528
529 if (vect_print_dump_info (REPORT_DETAILS))
530 {
531 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
532 print_generic_expr (vect_dump, op, TDF_SLIM);
533 }
534
535 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
536 gcc_assert (is_simple_use);
537 if (vect_print_dump_info (REPORT_DETAILS))
538 {
539 if (def)
540 {
541 fprintf (vect_dump, "def = ");
542 print_generic_expr (vect_dump, def, TDF_SLIM);
543 }
544 if (def_stmt)
545 {
546 fprintf (vect_dump, " def_stmt = ");
547 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
548 }
549 }
550
551 switch (dt)
552 {
553 /* Case 1: operand is a constant. */
554 case vect_constant_def:
555 {
556 if (scalar_def)
557 *scalar_def = op;
558
559 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
562
563 for (i = nunits - 1; i >= 0; --i)
564 {
565 t = tree_cons (NULL_TREE, op, t);
566 }
567 vec_cst = build_vector (vectype, t);
568 return vect_init_vector (stmt, vec_cst);
569 }
570
571 /* Case 2: operand is defined outside the loop - loop invariant. */
572 case vect_invariant_def:
573 {
574 if (scalar_def)
575 *scalar_def = def;
576
577 /* Create 'vec_inv = {inv,inv,..,inv}' */
578 if (vect_print_dump_info (REPORT_DETAILS))
579 fprintf (vect_dump, "Create vector_inv.");
580
581 for (i = nunits - 1; i >= 0; --i)
582 {
583 t = tree_cons (NULL_TREE, def, t);
584 }
585
586 /* FIXME: use build_constructor directly. */
587 vec_inv = build_constructor_from_list (vectype, t);
588 return vect_init_vector (stmt, vec_inv);
589 }
590
591 /* Case 3: operand is defined inside the loop. */
592 case vect_loop_def:
593 {
594 if (scalar_def)
595 *scalar_def = def_stmt;
596
597 /* Get the def from the vectorized stmt. */
598 def_stmt_info = vinfo_for_stmt (def_stmt);
599 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
600 gcc_assert (vec_stmt);
601 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
602 return vec_oprnd;
603 }
604
605 /* Case 4: operand is defined by a loop header phi - reduction */
606 case vect_reduction_def:
607 {
608 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
609
610 /* Get the def before the loop */
611 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
612 return get_initial_def_for_reduction (stmt, op, scalar_def);
613 }
614
615 /* Case 5: operand is defined by loop-header phi - induction. */
616 case vect_induction_def:
617 {
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "induction - unsupported.");
620 internal_error ("no support for induction"); /* FORNOW */
621 }
622
623 default:
624 gcc_unreachable ();
625 }
626 }
627
628
629 /* Function vect_get_vec_def_for_stmt_copy
630
631 Return a vector-def for an operand. This function is used when the
632 vectorized stmt to be created (by the caller to this function) is a "copy"
633 created in case the vectorized result cannot fit in one vector, and several
634 copies of the vector-stmt are required. In this case the vector-def is
635 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
636 of the stmt that defines VEC_OPRND.
637 DT is the type of the vector def VEC_OPRND.
638
639 Context:
640 In case the vectorization factor (VF) is bigger than the number
641 of elements that can fit in a vectype (nunits), we have to generate
642 more than one vector stmt to vectorize the scalar stmt. This situation
643 arises when there are multiple data-types operated upon in the loop; the
644 smallest data-type determines the VF, and as a result, when vectorizing
645 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
646 vector stmt (each computing a vector of 'nunits' results, and together
647 computing 'VF' results in each iteration). This function is called when
648 vectorizing such a stmt (e.g. vectorizing S2 in the illusration below, in
649 which VF=16 and nuniti=4, so the number of copies required is 4):
650
651 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
652
653 S1: x = load VS1.0: vx.0 = memref0 VS1.1
654 VS1.1: vx.1 = memref1 VS1.2
655 VS1.2: vx.2 = memref2 VS1.3
656 VS1.3: vx.3 = memref3
657
658 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
659 VSnew.1: vz1 = vx.1 + ... VSnew.2
660 VSnew.2: vz2 = vx.2 + ... VSnew.3
661 VSnew.3: vz3 = vx.3 + ...
662
663 The vectorization of S1 is explained in vectorizable_load.
664 The vectorization of S2:
665 To create the first vector-stmt out of the 4 copies - VSnew.0 -
666 the function 'vect_get_vec_def_for_operand' is called to
667 get the relevant vector-def for each operand of S2. For operand x it
668 returns the vector-def 'vx.0'.
669
670 To create the remaining copies of the vector-stmt (VSnew.j), this
671 function is called to get the relevant vector-def for each operand. It is
672 obtained from the respective VS1.j stmt, which is recorded in the
673 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
674
675 For example, to obtain the vector-def 'vx.1' in order to create the
676 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
677 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
678 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
679 and return its def ('vx.1').
680 Overall, to create the above sequence this function will be called 3 times:
681 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
682 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
683 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
684
685 static tree
686 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
687 {
688 tree vec_stmt_for_operand;
689 stmt_vec_info def_stmt_info;
690
691 if (dt == vect_invariant_def || dt == vect_constant_def)
692 {
693 /* Do nothing; can reuse same def. */ ;
694 return vec_oprnd;
695 }
696
697 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
698 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
699 gcc_assert (def_stmt_info);
700 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
701 gcc_assert (vec_stmt_for_operand);
702 vec_oprnd = TREE_OPERAND (vec_stmt_for_operand, 0);
703
704 return vec_oprnd;
705 }
706
707
708 /* Function vect_finish_stmt_generation.
709
710 Insert a new stmt. */
711
712 static void
713 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
714 block_stmt_iterator *bsi)
715 {
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
718
719 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
720 set_stmt_info (get_stmt_ann (vec_stmt),
721 new_stmt_vec_info (vec_stmt, loop_vinfo));
722
723 if (vect_print_dump_info (REPORT_DETAILS))
724 {
725 fprintf (vect_dump, "add new stmt: ");
726 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
727 }
728
729 /* Make sure bsi points to the stmt that is being vectorized. */
730 gcc_assert (stmt == bsi_stmt (*bsi));
731
732 #ifdef USE_MAPPED_LOCATION
733 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
734 #else
735 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
736 #endif
737 }
738
739
740 #define ADJUST_IN_EPILOG 1
741
742 /* Function get_initial_def_for_reduction
743
744 Input:
745 STMT - a stmt that performs a reduction operation in the loop.
746 INIT_VAL - the initial value of the reduction variable
747
748 Output:
749 SCALAR_DEF - a tree that holds a value to be added to the final result
750 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
751 Return a vector variable, initialized according to the operation that STMT
752 performs. This vector will be used as the initial value of the
753 vector of partial results.
754
755 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
756 add: [0,0,...,0,0]
757 mult: [1,1,...,1,1]
758 min/max: [init_val,init_val,..,init_val,init_val]
759 bit and/or: [init_val,init_val,..,init_val,init_val]
760 and when necessary (e.g. add/mult case) let the caller know
761 that it needs to adjust the result by init_val.
762
763 Option2: Initialize the vector as follows:
764 add: [0,0,...,0,init_val]
765 mult: [1,1,...,1,init_val]
766 min/max: [init_val,init_val,...,init_val]
767 bit and/or: [init_val,init_val,...,init_val]
768 and no adjustments are needed.
769
770 For example, for the following code:
771
772 s = init_val;
773 for (i=0;i<n;i++)
774 s = s + a[i];
775
776 STMT is 's = s + a[i]', and the reduction variable is 's'.
777 For a vector of 4 units, we want to return either [0,0,0,init_val],
778 or [0,0,0,0] and let the caller know that it needs to adjust
779 the result at the end by 'init_val'.
780
781 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
782 TODO: Use some cost-model to estimate which scheme is more profitable.
783 */
784
785 static tree
786 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
787 {
788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
789 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
790 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
791 int nelements;
792 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
793 tree type = TREE_TYPE (init_val);
794 tree def;
795 tree vec, t = NULL_TREE;
796 bool need_epilog_adjust;
797 int i;
798
799 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
800
801 switch (code)
802 {
803 case WIDEN_SUM_EXPR:
804 case DOT_PROD_EXPR:
805 case PLUS_EXPR:
806 if (INTEGRAL_TYPE_P (type))
807 def = build_int_cst (type, 0);
808 else
809 def = build_real (type, dconst0);
810
811 #ifdef ADJUST_IN_EPILOG
812 /* All the 'nunits' elements are set to 0. The final result will be
813 adjusted by 'init_val' at the loop epilog. */
814 nelements = nunits;
815 need_epilog_adjust = true;
816 #else
817 /* 'nunits - 1' elements are set to 0; The last element is set to
818 'init_val'. No further adjustments at the epilog are needed. */
819 nelements = nunits - 1;
820 need_epilog_adjust = false;
821 #endif
822 break;
823
824 case MIN_EXPR:
825 case MAX_EXPR:
826 def = init_val;
827 nelements = nunits;
828 need_epilog_adjust = false;
829 break;
830
831 default:
832 gcc_unreachable ();
833 }
834
835 for (i = nelements - 1; i >= 0; --i)
836 t = tree_cons (NULL_TREE, def, t);
837
838 if (nelements == nunits - 1)
839 {
840 /* Set the last element of the vector. */
841 t = tree_cons (NULL_TREE, init_val, t);
842 nelements += 1;
843 }
844 gcc_assert (nelements == nunits);
845
846 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
847 vec = build_vector (vectype, t);
848 else
849 vec = build_constructor_from_list (vectype, t);
850
851 if (!need_epilog_adjust)
852 *scalar_def = NULL_TREE;
853 else
854 *scalar_def = init_val;
855
856 return vect_init_vector (stmt, vec);
857 }
858
859
860 /* Function vect_create_epilog_for_reduction
861
862 Create code at the loop-epilog to finalize the result of a reduction
863 computation.
864
865 VECT_DEF is a vector of partial results.
866 REDUC_CODE is the tree-code for the epilog reduction.
867 STMT is the scalar reduction stmt that is being vectorized.
868 REDUCTION_PHI is the phi-node that carries the reduction computation.
869
870 This function:
871 1. Creates the reduction def-use cycle: sets the the arguments for
872 REDUCTION_PHI:
873 The loop-entry argument is the vectorized initial-value of the reduction.
874 The loop-latch argument is VECT_DEF - the vector of partial sums.
875 2. "Reduces" the vector of partial results VECT_DEF into a single result,
876 by applying the operation specified by REDUC_CODE if available, or by
877 other means (whole-vector shifts or a scalar loop).
878 The function also creates a new phi node at the loop exit to preserve
879 loop-closed form, as illustrated below.
880
881 The flow at the entry to this function:
882
883 loop:
884 vec_def = phi <null, null> # REDUCTION_PHI
885 VECT_DEF = vector_stmt # vectorized form of STMT
886 s_loop = scalar_stmt # (scalar) STMT
887 loop_exit:
888 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
889 use <s_out0>
890 use <s_out0>
891
892 The above is transformed by this function into:
893
894 loop:
895 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
896 VECT_DEF = vector_stmt # vectorized form of STMT
897 s_loop = scalar_stmt # (scalar) STMT
898 loop_exit:
899 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
900 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
901 v_out2 = reduce <v_out1>
902 s_out3 = extract_field <v_out2, 0>
903 s_out4 = adjust_result <s_out3>
904 use <s_out4>
905 use <s_out4>
906 */
907
908 static void
909 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
910 enum tree_code reduc_code, tree reduction_phi)
911 {
912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
913 tree vectype;
914 enum machine_mode mode;
915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
916 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
917 basic_block exit_bb;
918 tree scalar_dest;
919 tree scalar_type;
920 tree new_phi;
921 block_stmt_iterator exit_bsi;
922 tree vec_dest;
923 tree new_temp;
924 tree new_name;
925 tree epilog_stmt;
926 tree new_scalar_dest, exit_phi;
927 tree bitsize, bitpos, bytesize;
928 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
929 tree scalar_initial_def;
930 tree vec_initial_def;
931 tree orig_name;
932 imm_use_iterator imm_iter;
933 use_operand_p use_p;
934 bool extract_scalar_result;
935 tree reduction_op;
936 tree orig_stmt;
937 tree use_stmt;
938 tree operation = TREE_OPERAND (stmt, 1);
939 int op_type;
940
941 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
942 reduction_op = TREE_OPERAND (operation, op_type-1);
943 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
944 mode = TYPE_MODE (vectype);
945
946 /*** 1. Create the reduction def-use cycle ***/
947
948 /* 1.1 set the loop-entry arg of the reduction-phi: */
949 /* For the case of reduction, vect_get_vec_def_for_operand returns
950 the scalar def before the loop, that defines the initial value
951 of the reduction variable. */
952 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
953 &scalar_initial_def);
954 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
955
956 /* 1.2 set the loop-latch arg for the reduction-phi: */
957 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
958
959 if (vect_print_dump_info (REPORT_DETAILS))
960 {
961 fprintf (vect_dump, "transform reduction: created def-use cycle:");
962 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
963 fprintf (vect_dump, "\n");
964 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
965 }
966
967
968 /*** 2. Create epilog code
969 The reduction epilog code operates across the elements of the vector
970 of partial results computed by the vectorized loop.
971 The reduction epilog code consists of:
972 step 1: compute the scalar result in a vector (v_out2)
973 step 2: extract the scalar result (s_out3) from the vector (v_out2)
974 step 3: adjust the scalar result (s_out3) if needed.
975
976 Step 1 can be accomplished using one the following three schemes:
977 (scheme 1) using reduc_code, if available.
978 (scheme 2) using whole-vector shifts, if available.
979 (scheme 3) using a scalar loop. In this case steps 1+2 above are
980 combined.
981
982 The overall epilog code looks like this:
983
984 s_out0 = phi <s_loop> # original EXIT_PHI
985 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
986 v_out2 = reduce <v_out1> # step 1
987 s_out3 = extract_field <v_out2, 0> # step 2
988 s_out4 = adjust_result <s_out3> # step 3
989
990 (step 3 is optional, and step2 1 and 2 may be combined).
991 Lastly, the uses of s_out0 are replaced by s_out4.
992
993 ***/
994
995 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
996 v_out1 = phi <v_loop> */
997
998 exit_bb = loop->single_exit->dest;
999 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1000 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
1001 exit_bsi = bsi_start (exit_bb);
1002
1003 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1004 (i.e. when reduc_code is not available) and in the final adjustment code
1005 (if needed). Also get the original scalar reduction variable as
1006 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1007 represents a reduction pattern), the tree-code and scalar-def are
1008 taken from the original stmt that the pattern-stmt (STMT) replaces.
1009 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1010 are taken from STMT. */
1011
1012 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1013 if (!orig_stmt)
1014 {
1015 /* Regular reduction */
1016 orig_stmt = stmt;
1017 }
1018 else
1019 {
1020 /* Reduction pattern */
1021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1022 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1023 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1024 }
1025 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1026 scalar_dest = TREE_OPERAND (orig_stmt, 0);
1027 scalar_type = TREE_TYPE (scalar_dest);
1028 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1029 bitsize = TYPE_SIZE (scalar_type);
1030 bytesize = TYPE_SIZE_UNIT (scalar_type);
1031
1032 /* 2.3 Create the reduction code, using one of the three schemes described
1033 above. */
1034
1035 if (reduc_code < NUM_TREE_CODES)
1036 {
1037 /*** Case 1: Create:
1038 v_out2 = reduc_expr <v_out1> */
1039
1040 if (vect_print_dump_info (REPORT_DETAILS))
1041 fprintf (vect_dump, "Reduce using direct vector reduction.");
1042
1043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1044 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1045 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1046 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1047 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1049
1050 extract_scalar_result = true;
1051 }
1052 else
1053 {
1054 enum tree_code shift_code = 0;
1055 bool have_whole_vector_shift = true;
1056 int bit_offset;
1057 int element_bitsize = tree_low_cst (bitsize, 1);
1058 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1059 tree vec_temp;
1060
1061 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1062 shift_code = VEC_RSHIFT_EXPR;
1063 else
1064 have_whole_vector_shift = false;
1065
1066 /* Regardless of whether we have a whole vector shift, if we're
1067 emulating the operation via tree-vect-generic, we don't want
1068 to use it. Only the first round of the reduction is likely
1069 to still be profitable via emulation. */
1070 /* ??? It might be better to emit a reduction tree code here, so that
1071 tree-vect-generic can expand the first round via bit tricks. */
1072 if (!VECTOR_MODE_P (mode))
1073 have_whole_vector_shift = false;
1074 else
1075 {
1076 optab optab = optab_for_tree_code (code, vectype);
1077 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1078 have_whole_vector_shift = false;
1079 }
1080
1081 if (have_whole_vector_shift)
1082 {
1083 /*** Case 2: Create:
1084 for (offset = VS/2; offset >= element_size; offset/=2)
1085 {
1086 Create: va' = vec_shift <va, offset>
1087 Create: va = vop <va, va'>
1088 } */
1089
1090 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "Reduce using vector shifts");
1092
1093 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1094 new_temp = PHI_RESULT (new_phi);
1095
1096 for (bit_offset = vec_size_in_bits/2;
1097 bit_offset >= element_bitsize;
1098 bit_offset /= 2)
1099 {
1100 tree bitpos = size_int (bit_offset);
1101
1102 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1103 build2 (shift_code, vectype, new_temp, bitpos));
1104 new_name = make_ssa_name (vec_dest, epilog_stmt);
1105 TREE_OPERAND (epilog_stmt, 0) = new_name;
1106 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1107
1108 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1109 build2 (code, vectype, new_name, new_temp));
1110 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1111 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1112 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1113 }
1114
1115 extract_scalar_result = true;
1116 }
1117 else
1118 {
1119 tree rhs;
1120
1121 /*** Case 3: Create:
1122 s = extract_field <v_out2, 0>
1123 for (offset = element_size;
1124 offset < vector_size;
1125 offset += element_size;)
1126 {
1127 Create: s' = extract_field <v_out2, offset>
1128 Create: s = op <s, s'>
1129 } */
1130
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "Reduce using scalar code. ");
1133
1134 vec_temp = PHI_RESULT (new_phi);
1135 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1136 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1137 bitsize_zero_node);
1138 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1139 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1140 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1141 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1142 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1143
1144 for (bit_offset = element_bitsize;
1145 bit_offset < vec_size_in_bits;
1146 bit_offset += element_bitsize)
1147 {
1148 tree bitpos = bitsize_int (bit_offset);
1149 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1150 bitpos);
1151
1152 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1153 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1154 rhs);
1155 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1156 TREE_OPERAND (epilog_stmt, 0) = new_name;
1157 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1158
1159 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1160 build2 (code, scalar_type, new_name, new_temp));
1161 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1162 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1163 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1164 }
1165
1166 extract_scalar_result = false;
1167 }
1168 }
1169
1170 /* 2.4 Extract the final scalar result. Create:
1171 s_out3 = extract_field <v_out2, bitpos> */
1172
1173 if (extract_scalar_result)
1174 {
1175 tree rhs;
1176
1177 if (vect_print_dump_info (REPORT_DETAILS))
1178 fprintf (vect_dump, "extract scalar result");
1179
1180 if (BYTES_BIG_ENDIAN)
1181 bitpos = size_binop (MULT_EXPR,
1182 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1183 TYPE_SIZE (scalar_type));
1184 else
1185 bitpos = bitsize_zero_node;
1186
1187 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1188 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1189 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1190 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1191 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1192 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1193 }
1194
1195 /* 2.4 Adjust the final result by the initial value of the reduction
1196 variable. (When such adjustment is not needed, then
1197 'scalar_initial_def' is zero).
1198
1199 Create:
1200 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1201
1202 if (scalar_initial_def)
1203 {
1204 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1205 build2 (code, scalar_type, new_temp, scalar_initial_def));
1206 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1207 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1208 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1209 }
1210
1211 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1212
1213 /* Find the loop-closed-use at the loop exit of the original scalar result.
1214 (The reduction result is expected to have two immediate uses - one at the
1215 latch block, and one at the loop exit). */
1216 exit_phi = NULL;
1217 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1218 {
1219 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1220 {
1221 exit_phi = USE_STMT (use_p);
1222 break;
1223 }
1224 }
1225 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1226 gcc_assert (exit_phi);
1227 /* Replace the uses: */
1228 orig_name = PHI_RESULT (exit_phi);
1229 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1230 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1231 SET_USE (use_p, new_temp);
1232 }
1233
1234
1235 /* Function vectorizable_reduction.
1236
1237 Check if STMT performs a reduction operation that can be vectorized.
1238 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1239 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1240 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1241
1242 This function also handles reduction idioms (patterns) that have been
1243 recognized in advance during vect_pattern_recog. In this case, STMT may be
1244 of this form:
1245 X = pattern_expr (arg0, arg1, ..., X)
1246 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1247 sequence that had been detected and replaced by the pattern-stmt (STMT).
1248
1249 In some cases of reduction patterns, the type of the reduction variable X is
1250 different than the type of the other arguments of STMT.
1251 In such cases, the vectype that is used when transforming STMT into a vector
1252 stmt is different than the vectype that is used to determine the
1253 vectorization factor, because it consists of a different number of elements
1254 than the actual number of elements that are being operated upon in parallel.
1255
1256 For example, consider an accumulation of shorts into an int accumulator.
1257 On some targets it's possible to vectorize this pattern operating on 8
1258 shorts at a time (hence, the vectype for purposes of determining the
1259 vectorization factor should be V8HI); on the other hand, the vectype that
1260 is used to create the vector form is actually V4SI (the type of the result).
1261
1262 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1263 indicates what is the actual level of parallelism (V8HI in the example), so
1264 that the right vectorization factor would be derived. This vectype
1265 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1266 be used to create the vectorized stmt. The right vectype for the vectorized
1267 stmt is obtained from the type of the result X:
1268 get_vectype_for_scalar_type (TREE_TYPE (X))
1269
1270 This means that, contrary to "regular" reductions (or "regular" stmts in
1271 general), the following equation:
1272 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1273 does *NOT* necessarily hold for reduction patterns. */
1274
1275 bool
1276 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1277 {
1278 tree vec_dest;
1279 tree scalar_dest;
1280 tree op;
1281 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1282 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1283 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1284 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1285 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1286 tree operation;
1287 enum tree_code code, orig_code, epilog_reduc_code = 0;
1288 enum machine_mode vec_mode;
1289 int op_type;
1290 optab optab, reduc_optab;
1291 tree new_temp = NULL_TREE;
1292 tree def, def_stmt;
1293 enum vect_def_type dt;
1294 tree new_phi;
1295 tree scalar_type;
1296 bool is_simple_use;
1297 tree orig_stmt;
1298 stmt_vec_info orig_stmt_info;
1299 tree expr = NULL_TREE;
1300 int i;
1301 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1302 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1303 stmt_vec_info prev_stmt_info;
1304 tree reduc_def;
1305 tree new_stmt = NULL_TREE;
1306 int j;
1307
1308 gcc_assert (ncopies >= 1);
1309
1310 /* 1. Is vectorizable reduction? */
1311
1312 /* Not supportable if the reduction variable is used in the loop. */
1313 if (STMT_VINFO_RELEVANT_P (stmt_info))
1314 return false;
1315
1316 if (!STMT_VINFO_LIVE_P (stmt_info))
1317 return false;
1318
1319 /* Make sure it was already recognized as a reduction computation. */
1320 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1321 return false;
1322
1323 /* 2. Has this been recognized as a reduction pattern?
1324
1325 Check if STMT represents a pattern that has been recognized
1326 in earlier analysis stages. For stmts that represent a pattern,
1327 the STMT_VINFO_RELATED_STMT field records the last stmt in
1328 the original sequence that constitutes the pattern. */
1329
1330 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1331 if (orig_stmt)
1332 {
1333 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1334 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1335 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1336 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1337 }
1338
1339 /* 3. Check the operands of the operation. The first operands are defined
1340 inside the loop body. The last operand is the reduction variable,
1341 which is defined by the loop-header-phi. */
1342
1343 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1344
1345 operation = TREE_OPERAND (stmt, 1);
1346 code = TREE_CODE (operation);
1347 op_type = TREE_CODE_LENGTH (code);
1348 if (op_type != binary_op && op_type != ternary_op)
1349 return false;
1350 scalar_dest = TREE_OPERAND (stmt, 0);
1351 scalar_type = TREE_TYPE (scalar_dest);
1352
1353 /* All uses but the last are expected to be defined in the loop.
1354 The last use is the reduction variable. */
1355 for (i = 0; i < op_type-1; i++)
1356 {
1357 op = TREE_OPERAND (operation, i);
1358 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1359 gcc_assert (is_simple_use);
1360 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1361 dt == vect_constant_def);
1362 }
1363
1364 op = TREE_OPERAND (operation, i);
1365 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1366 gcc_assert (is_simple_use);
1367 gcc_assert (dt == vect_reduction_def);
1368 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1369 if (orig_stmt)
1370 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1371 else
1372 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1373
1374 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1375 return false;
1376
1377 /* 4. Supportable by target? */
1378
1379 /* 4.1. check support for the operation in the loop */
1380 optab = optab_for_tree_code (code, vectype);
1381 if (!optab)
1382 {
1383 if (vect_print_dump_info (REPORT_DETAILS))
1384 fprintf (vect_dump, "no optab.");
1385 return false;
1386 }
1387 vec_mode = TYPE_MODE (vectype);
1388 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1389 {
1390 if (vect_print_dump_info (REPORT_DETAILS))
1391 fprintf (vect_dump, "op not supported by target.");
1392 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1393 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1394 < vect_min_worthwhile_factor (code))
1395 return false;
1396 if (vect_print_dump_info (REPORT_DETAILS))
1397 fprintf (vect_dump, "proceeding using word mode.");
1398 }
1399
1400 /* Worthwhile without SIMD support? */
1401 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1402 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1403 < vect_min_worthwhile_factor (code))
1404 {
1405 if (vect_print_dump_info (REPORT_DETAILS))
1406 fprintf (vect_dump, "not worthwhile without SIMD support.");
1407 return false;
1408 }
1409
1410 /* 4.2. Check support for the epilog operation.
1411
1412 If STMT represents a reduction pattern, then the type of the
1413 reduction variable may be different than the type of the rest
1414 of the arguments. For example, consider the case of accumulation
1415 of shorts into an int accumulator; The original code:
1416 S1: int_a = (int) short_a;
1417 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1418
1419 was replaced with:
1420 STMT: int_acc = widen_sum <short_a, int_acc>
1421
1422 This means that:
1423 1. The tree-code that is used to create the vector operation in the
1424 epilog code (that reduces the partial results) is not the
1425 tree-code of STMT, but is rather the tree-code of the original
1426 stmt from the pattern that STMT is replacing. I.e, in the example
1427 above we want to use 'widen_sum' in the loop, but 'plus' in the
1428 epilog.
1429 2. The type (mode) we use to check available target support
1430 for the vector operation to be created in the *epilog*, is
1431 determined by the type of the reduction variable (in the example
1432 above we'd check this: plus_optab[vect_int_mode]).
1433 However the type (mode) we use to check available target support
1434 for the vector operation to be created *inside the loop*, is
1435 determined by the type of the other arguments to STMT (in the
1436 example we'd check this: widen_sum_optab[vect_short_mode]).
1437
1438 This is contrary to "regular" reductions, in which the types of all
1439 the arguments are the same as the type of the reduction variable.
1440 For "regular" reductions we can therefore use the same vector type
1441 (and also the same tree-code) when generating the epilog code and
1442 when generating the code inside the loop. */
1443
1444 if (orig_stmt)
1445 {
1446 /* This is a reduction pattern: get the vectype from the type of the
1447 reduction variable, and get the tree-code from orig_stmt. */
1448 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1449 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1450 vec_mode = TYPE_MODE (vectype);
1451 }
1452 else
1453 {
1454 /* Regular reduction: use the same vectype and tree-code as used for
1455 the vector code inside the loop can be used for the epilog code. */
1456 orig_code = code;
1457 }
1458
1459 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1460 return false;
1461 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1462 if (!reduc_optab)
1463 {
1464 if (vect_print_dump_info (REPORT_DETAILS))
1465 fprintf (vect_dump, "no optab for reduction.");
1466 epilog_reduc_code = NUM_TREE_CODES;
1467 }
1468 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1469 {
1470 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "reduc op not supported by target.");
1472 epilog_reduc_code = NUM_TREE_CODES;
1473 }
1474
1475 if (!vec_stmt) /* transformation not required. */
1476 {
1477 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1478 return true;
1479 }
1480
1481 /** Transform. **/
1482
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "transform reduction.");
1485
1486 /* Create the destination vector */
1487 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1488
1489 /* Create the reduction-phi that defines the reduction-operand. */
1490 new_phi = create_phi_node (vec_dest, loop->header);
1491
1492 /* In case the vectorization factor (VF) is bigger than the number
1493 of elements that we can fit in a vectype (nunits), we have to generate
1494 more than one vector stmt - i.e - we need to "unroll" the
1495 vector stmt by a factor VF/nunits. For more details see documentation
1496 in vectorizable_operation. */
1497
1498 prev_stmt_info = NULL;
1499 for (j = 0; j < ncopies; j++)
1500 {
1501 /* Handle uses. */
1502 if (j == 0)
1503 {
1504 op = TREE_OPERAND (operation, 0);
1505 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1506 if (op_type == ternary_op)
1507 {
1508 op = TREE_OPERAND (operation, 1);
1509 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1510 }
1511
1512 /* Get the vector def for the reduction variable from the phi node */
1513 reduc_def = PHI_RESULT (new_phi);
1514 }
1515 else
1516 {
1517 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1518 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1519 if (op_type == ternary_op)
1520 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1521
1522 /* Get the vector def for the reduction variable from the vectorized
1523 reduction operation generated in the previous iteration (j-1) */
1524 reduc_def = TREE_OPERAND (new_stmt ,0);
1525 }
1526
1527 /* Arguments are ready. create the new vector stmt. */
1528
1529 if (op_type == binary_op)
1530 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1531 else
1532 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1533 reduc_def);
1534 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
1535 new_temp = make_ssa_name (vec_dest, new_stmt);
1536 TREE_OPERAND (new_stmt, 0) = new_temp;
1537 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1538
1539 if (j == 0)
1540 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1541 else
1542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1543 prev_stmt_info = vinfo_for_stmt (new_stmt);
1544 }
1545
1546 /* Finalize the reduction-phi (set it's arguments) and create the
1547 epilog reduction code. */
1548 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1549 return true;
1550 }
1551
1552
1553 /* Function vectorizable_assignment.
1554
1555 Check if STMT performs an assignment (copy) that can be vectorized.
1556 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1557 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1558 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1559
1560 bool
1561 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1562 {
1563 tree vec_dest;
1564 tree scalar_dest;
1565 tree op;
1566 tree vec_oprnd;
1567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1570 tree new_temp;
1571 tree def, def_stmt;
1572 enum vect_def_type dt;
1573 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1574 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1575
1576 gcc_assert (ncopies >= 1);
1577 if (ncopies > 1)
1578 return false; /* FORNOW */
1579
1580 /* Is vectorizable assignment? */
1581 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1582 return false;
1583
1584 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1585
1586 if (TREE_CODE (stmt) != MODIFY_EXPR)
1587 return false;
1588
1589 scalar_dest = TREE_OPERAND (stmt, 0);
1590 if (TREE_CODE (scalar_dest) != SSA_NAME)
1591 return false;
1592
1593 op = TREE_OPERAND (stmt, 1);
1594 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1595 {
1596 if (vect_print_dump_info (REPORT_DETAILS))
1597 fprintf (vect_dump, "use not simple.");
1598 return false;
1599 }
1600
1601 if (!vec_stmt) /* transformation not required. */
1602 {
1603 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1604 return true;
1605 }
1606
1607 /** Transform. **/
1608 if (vect_print_dump_info (REPORT_DETAILS))
1609 fprintf (vect_dump, "transform assignment.");
1610
1611 /* Handle def. */
1612 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1613
1614 /* Handle use. */
1615 op = TREE_OPERAND (stmt, 1);
1616 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1617
1618 /* Arguments are ready. create the new vector stmt. */
1619 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1620 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1621 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1622 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1623
1624 return true;
1625 }
1626
1627
1628 /* Function vect_min_worthwhile_factor.
1629
1630 For a loop where we could vectorize the operation indicated by CODE,
1631 return the minimum vectorization factor that makes it worthwhile
1632 to use generic vectors. */
1633 static int
1634 vect_min_worthwhile_factor (enum tree_code code)
1635 {
1636 switch (code)
1637 {
1638 case PLUS_EXPR:
1639 case MINUS_EXPR:
1640 case NEGATE_EXPR:
1641 return 4;
1642
1643 case BIT_AND_EXPR:
1644 case BIT_IOR_EXPR:
1645 case BIT_XOR_EXPR:
1646 case BIT_NOT_EXPR:
1647 return 2;
1648
1649 default:
1650 return INT_MAX;
1651 }
1652 }
1653
1654
1655 /* Function vectorizable_operation.
1656
1657 Check if STMT performs a binary or unary operation that can be vectorized.
1658 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1659 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1660 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1661
1662 bool
1663 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1664 {
1665 tree vec_dest;
1666 tree scalar_dest;
1667 tree operation;
1668 tree op0, op1 = NULL;
1669 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1672 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1673 enum tree_code code;
1674 enum machine_mode vec_mode;
1675 tree new_temp;
1676 int op_type;
1677 optab optab;
1678 int icode;
1679 enum machine_mode optab_op2_mode;
1680 tree def, def_stmt;
1681 enum vect_def_type dt0, dt1;
1682 tree new_stmt;
1683 stmt_vec_info prev_stmt_info;
1684 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1685 int nunits_out;
1686 tree vectype_out;
1687 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1688 int j;
1689
1690 gcc_assert (ncopies >= 1);
1691
1692 /* Is STMT a vectorizable binary/unary operation? */
1693 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1694 return false;
1695
1696 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1697
1698 if (STMT_VINFO_LIVE_P (stmt_info))
1699 {
1700 /* FORNOW: not yet supported. */
1701 if (vect_print_dump_info (REPORT_DETAILS))
1702 fprintf (vect_dump, "value used after loop.");
1703 return false;
1704 }
1705
1706 if (TREE_CODE (stmt) != MODIFY_EXPR)
1707 return false;
1708
1709 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1710 return false;
1711
1712 scalar_dest = TREE_OPERAND (stmt, 0);
1713 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1714 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1715 if (nunits_out != nunits_in)
1716 return false;
1717
1718 operation = TREE_OPERAND (stmt, 1);
1719 code = TREE_CODE (operation);
1720 optab = optab_for_tree_code (code, vectype);
1721
1722 /* Support only unary or binary operations. */
1723 op_type = TREE_CODE_LENGTH (code);
1724 if (op_type != unary_op && op_type != binary_op)
1725 {
1726 if (vect_print_dump_info (REPORT_DETAILS))
1727 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1728 return false;
1729 }
1730
1731 op0 = TREE_OPERAND (operation, 0);
1732 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1733 {
1734 if (vect_print_dump_info (REPORT_DETAILS))
1735 fprintf (vect_dump, "use not simple.");
1736 return false;
1737 }
1738
1739 if (op_type == binary_op)
1740 {
1741 op1 = TREE_OPERAND (operation, 1);
1742 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1743 {
1744 if (vect_print_dump_info (REPORT_DETAILS))
1745 fprintf (vect_dump, "use not simple.");
1746 return false;
1747 }
1748 }
1749
1750 /* Supportable by target? */
1751 if (!optab)
1752 {
1753 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "no optab.");
1755 return false;
1756 }
1757 vec_mode = TYPE_MODE (vectype);
1758 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1759 if (icode == CODE_FOR_nothing)
1760 {
1761 if (vect_print_dump_info (REPORT_DETAILS))
1762 fprintf (vect_dump, "op not supported by target.");
1763 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1764 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1765 < vect_min_worthwhile_factor (code))
1766 return false;
1767 if (vect_print_dump_info (REPORT_DETAILS))
1768 fprintf (vect_dump, "proceeding using word mode.");
1769 }
1770
1771 /* Worthwhile without SIMD support? */
1772 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1773 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1774 < vect_min_worthwhile_factor (code))
1775 {
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "not worthwhile without SIMD support.");
1778 return false;
1779 }
1780
1781 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1782 {
1783 /* FORNOW: not yet supported. */
1784 if (!VECTOR_MODE_P (vec_mode))
1785 return false;
1786
1787 /* Invariant argument is needed for a vector shift
1788 by a scalar shift operand. */
1789 optab_op2_mode = insn_data[icode].operand[2].mode;
1790 if (! (VECTOR_MODE_P (optab_op2_mode)
1791 || dt1 == vect_constant_def
1792 || dt1 == vect_invariant_def))
1793 {
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "operand mode requires invariant argument.");
1796 return false;
1797 }
1798 }
1799
1800 if (!vec_stmt) /* transformation not required. */
1801 {
1802 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1803 return true;
1804 }
1805
1806 /** Transform. **/
1807
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "transform binary/unary operation.");
1810
1811 /* Handle def. */
1812 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1813
1814 /* In case the vectorization factor (VF) is bigger than the number
1815 of elements that we can fit in a vectype (nunits), we have to generate
1816 more than one vector stmt - i.e - we need to "unroll" the
1817 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1818 from one copy of the vector stmt to the next, in the field
1819 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1820 stages to find the correct vector defs to be used when vectorizing
1821 stmts that use the defs of the current stmt. The example below illustrates
1822 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1823 4 vectorized stmts):
1824
1825 before vectorization:
1826 RELATED_STMT VEC_STMT
1827 S1: x = memref - -
1828 S2: z = x + 1 - -
1829
1830 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
1831 there):
1832 RELATED_STMT VEC_STMT
1833 VS1_0: vx0 = memref0 VS1_1 -
1834 VS1_1: vx1 = memref1 VS1_2 -
1835 VS1_2: vx2 = memref2 VS1_3 -
1836 VS1_3: vx3 = memref3 - -
1837 S1: x = load - VS1_0
1838 S2: z = x + 1 - -
1839
1840 step2: vectorize stmt S2 (done here):
1841 To vectorize stmt S2 we first need to find the relevant vector
1842 def for the first operand 'x'. This is, as usual, obtained from
1843 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
1844 that defines 'x' (S1). This way we find the stmt VS1_0, and the
1845 relevant vector def 'vx0'. Having found 'vx0' we can generate
1846 the vector stmt VS2_0, and as usual, record it in the
1847 STMT_VINFO_VEC_STMT of stmt S2.
1848 When creating the second copy (VS2_1), we obtain the relevant vector
1849 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
1850 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
1851 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
1852 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
1853 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
1854 chain of stmts and pointers:
1855 RELATED_STMT VEC_STMT
1856 VS1_0: vx0 = memref0 VS1_1 -
1857 VS1_1: vx1 = memref1 VS1_2 -
1858 VS1_2: vx2 = memref2 VS1_3 -
1859 VS1_3: vx3 = memref3 - -
1860 S1: x = load - VS1_0
1861 VS2_0: vz0 = vx0 + v1 VS2_1 -
1862 VS2_1: vz1 = vx1 + v1 VS2_2 -
1863 VS2_2: vz2 = vx2 + v1 VS2_3 -
1864 VS2_3: vz3 = vx3 + v1 - -
1865 S2: z = x + 1 - VS2_0 */
1866
1867 prev_stmt_info = NULL;
1868 for (j = 0; j < ncopies; j++)
1869 {
1870 /* Handle uses. */
1871 if (j == 0)
1872 {
1873 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1874 if (op_type == binary_op)
1875 {
1876 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1877 {
1878 /* Vector shl and shr insn patterns can be defined with
1879 scalar operand 2 (shift operand). In this case, use
1880 constant or loop invariant op1 directly, without
1881 extending it to vector mode first. */
1882 optab_op2_mode = insn_data[icode].operand[2].mode;
1883 if (!VECTOR_MODE_P (optab_op2_mode))
1884 {
1885 if (vect_print_dump_info (REPORT_DETAILS))
1886 fprintf (vect_dump, "operand 1 using scalar mode.");
1887 vec_oprnd1 = op1;
1888 }
1889 }
1890 if (!vec_oprnd1)
1891 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1892 }
1893 }
1894 else
1895 {
1896 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
1897 if (op_type == binary_op)
1898 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
1899 }
1900
1901 /* Arguments are ready. create the new vector stmt. */
1902
1903 if (op_type == binary_op)
1904 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1905 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1906 else
1907 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1908 build1 (code, vectype, vec_oprnd0));
1909 new_temp = make_ssa_name (vec_dest, new_stmt);
1910 TREE_OPERAND (new_stmt, 0) = new_temp;
1911 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1912
1913 if (j == 0)
1914 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1915 else
1916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917 prev_stmt_info = vinfo_for_stmt (new_stmt);
1918 }
1919
1920 return true;
1921 }
1922
1923
1924 /* Function vectorizable_type_demotion
1925
1926 Check if STMT performs a binary or unary operation that involves
1927 type demotion, and if it can be vectorized.
1928 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1929 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1930 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1931
1932 bool
1933 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
1934 tree *vec_stmt)
1935 {
1936 tree vec_dest;
1937 tree scalar_dest;
1938 tree operation;
1939 tree op0;
1940 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
1941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1942 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1943 enum tree_code code;
1944 tree new_temp;
1945 tree def, def_stmt;
1946 enum vect_def_type dt0;
1947 tree new_stmt;
1948 stmt_vec_info prev_stmt_info;
1949 int nunits_in;
1950 int nunits_out;
1951 tree vectype_out;
1952 int ncopies;
1953 int j;
1954 tree expr;
1955 tree vectype_in;
1956 tree scalar_type;
1957 optab optab;
1958 enum machine_mode vec_mode;
1959
1960 /* Is STMT a vectorizable type-demotion operation? */
1961
1962 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1963 return false;
1964
1965 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1966
1967 if (STMT_VINFO_LIVE_P (stmt_info))
1968 {
1969 /* FORNOW: not yet supported. */
1970 if (vect_print_dump_info (REPORT_DETAILS))
1971 fprintf (vect_dump, "value used after loop.");
1972 return false;
1973 }
1974
1975 if (TREE_CODE (stmt) != MODIFY_EXPR)
1976 return false;
1977
1978 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1979 return false;
1980
1981 operation = TREE_OPERAND (stmt, 1);
1982 code = TREE_CODE (operation);
1983 if (code != NOP_EXPR && code != CONVERT_EXPR)
1984 return false;
1985
1986 op0 = TREE_OPERAND (operation, 0);
1987 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
1988 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1989
1990 scalar_dest = TREE_OPERAND (stmt, 0);
1991 scalar_type = TREE_TYPE (scalar_dest);
1992 vectype_out = get_vectype_for_scalar_type (scalar_type);
1993 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1994 if (nunits_in != nunits_out / 2) /* FORNOW */
1995 return false;
1996
1997 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1998 gcc_assert (ncopies >= 1);
1999
2000 /* Check the operands of the operation. */
2001 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2002 {
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "use not simple.");
2005 return false;
2006 }
2007
2008 /* Supportable by target? */
2009 code = VEC_PACK_MOD_EXPR;
2010 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2011 if (!optab)
2012 return false;
2013
2014 vec_mode = TYPE_MODE (vectype_in);
2015 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2016 return false;
2017
2018 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2019
2020 if (!vec_stmt) /* transformation not required. */
2021 {
2022 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2023 return true;
2024 }
2025
2026 /** Transform. **/
2027
2028 if (vect_print_dump_info (REPORT_DETAILS))
2029 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2030 ncopies);
2031
2032 /* Handle def. */
2033 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2034
2035 /* In case the vectorization factor (VF) is bigger than the number
2036 of elements that we can fit in a vectype (nunits), we have to generate
2037 more than one vector stmt - i.e - we need to "unroll" the
2038 vector stmt by a factor VF/nunits. */
2039 prev_stmt_info = NULL;
2040 for (j = 0; j < ncopies; j++)
2041 {
2042 /* Handle uses. */
2043 if (j == 0)
2044 {
2045 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2046 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2047 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2048 }
2049 else
2050 {
2051 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2052 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2053 }
2054
2055 /* Arguments are ready. Create the new vector stmt. */
2056 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2057 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2058 new_temp = make_ssa_name (vec_dest, new_stmt);
2059 TREE_OPERAND (new_stmt, 0) = new_temp;
2060 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2061
2062 if (j == 0)
2063 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2064 else
2065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2066
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2068 }
2069
2070 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2071 return true;
2072 }
2073
2074
2075 /* Function vect_gen_widened_results_half
2076
2077 Create a vector stmt whose code, type, number of arguments, and result
2078 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2079 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2080 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2081 needs to be created (DECL is a function-decl of a target-builtin).
2082 STMT is the original scalar stmt that we are vectorizing. */
2083
2084 static tree
2085 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2086 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2087 tree vec_dest, block_stmt_iterator *bsi,
2088 tree stmt)
2089 {
2090 tree vec_params;
2091 tree expr;
2092 tree new_stmt;
2093 tree new_temp;
2094 tree sym;
2095 ssa_op_iter iter;
2096
2097 /* Generate half of the widened result: */
2098 if (code == CALL_EXPR)
2099 {
2100 /* Target specific support */
2101 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2102 if (op_type == binary_op)
2103 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2104 expr = build_function_call_expr (decl, vec_params);
2105 }
2106 else
2107 {
2108 /* Generic support */
2109 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2110 if (op_type == binary_op)
2111 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2112 else
2113 expr = build1 (code, vectype, vec_oprnd0);
2114 }
2115 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2116 new_temp = make_ssa_name (vec_dest, new_stmt);
2117 TREE_OPERAND (new_stmt, 0) = new_temp;
2118 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2119
2120 if (code == CALL_EXPR)
2121 {
2122 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2123 {
2124 if (TREE_CODE (sym) == SSA_NAME)
2125 sym = SSA_NAME_VAR (sym);
2126 mark_sym_for_renaming (sym);
2127 }
2128 }
2129
2130 return new_stmt;
2131 }
2132
2133
2134 /* Function vectorizable_type_promotion
2135
2136 Check if STMT performs a binary or unary operation that involves
2137 type promotion, and if it can be vectorized.
2138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2141
2142 bool
2143 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2144 tree *vec_stmt)
2145 {
2146 tree vec_dest;
2147 tree scalar_dest;
2148 tree operation;
2149 tree op0, op1 = NULL;
2150 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2152 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2153 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2154 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2155 int op_type;
2156 tree def, def_stmt;
2157 enum vect_def_type dt0, dt1;
2158 tree new_stmt;
2159 stmt_vec_info prev_stmt_info;
2160 int nunits_in;
2161 int nunits_out;
2162 tree vectype_out;
2163 int ncopies;
2164 int j;
2165 tree vectype_in;
2166
2167 /* Is STMT a vectorizable type-promotion operation? */
2168
2169 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2170 return false;
2171
2172 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2173
2174 if (STMT_VINFO_LIVE_P (stmt_info))
2175 {
2176 /* FORNOW: not yet supported. */
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "value used after loop.");
2179 return false;
2180 }
2181
2182 if (TREE_CODE (stmt) != MODIFY_EXPR)
2183 return false;
2184
2185 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2186 return false;
2187
2188 operation = TREE_OPERAND (stmt, 1);
2189 code = TREE_CODE (operation);
2190 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2191 return false;
2192
2193 op0 = TREE_OPERAND (operation, 0);
2194 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2195 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2196 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2197 gcc_assert (ncopies >= 1);
2198
2199 scalar_dest = TREE_OPERAND (stmt, 0);
2200 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2201 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2202 if (nunits_out != nunits_in / 2) /* FORNOW */
2203 return false;
2204
2205 /* Check the operands of the operation. */
2206 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2207 {
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "use not simple.");
2210 return false;
2211 }
2212
2213 op_type = TREE_CODE_LENGTH (code);
2214 if (op_type == binary_op)
2215 {
2216 op1 = TREE_OPERAND (operation, 1);
2217 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2218 {
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "use not simple.");
2221 return false;
2222 }
2223 }
2224
2225 /* Supportable by target? */
2226 if (!supportable_widening_operation (code, stmt, vectype_in,
2227 &decl1, &decl2, &code1, &code2))
2228 return false;
2229
2230 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2231
2232 if (!vec_stmt) /* transformation not required. */
2233 {
2234 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2235 return true;
2236 }
2237
2238 /** Transform. **/
2239
2240 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2242 ncopies);
2243
2244 /* Handle def. */
2245 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2246
2247 /* In case the vectorization factor (VF) is bigger than the number
2248 of elements that we can fit in a vectype (nunits), we have to generate
2249 more than one vector stmt - i.e - we need to "unroll" the
2250 vector stmt by a factor VF/nunits. */
2251
2252 prev_stmt_info = NULL;
2253 for (j = 0; j < ncopies; j++)
2254 {
2255 /* Handle uses. */
2256 if (j == 0)
2257 {
2258 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2259 if (op_type == binary_op)
2260 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2261 }
2262 else
2263 {
2264 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2265 if (op_type == binary_op)
2266 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2267 }
2268
2269 /* Arguments are ready. Create the new vector stmt. We are creating
2270 two vector defs because the widened result does not fit in one vector.
2271 The vectorized stmt can be expressed as a call to a taregt builtin,
2272 or a using a tree-code. */
2273 /* Generate first half of the widened result: */
2274 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2275 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2276 if (j == 0)
2277 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2278 else
2279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2280 prev_stmt_info = vinfo_for_stmt (new_stmt);
2281
2282 /* Generate second half of the widened result: */
2283 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2284 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2286 prev_stmt_info = vinfo_for_stmt (new_stmt);
2287
2288 }
2289
2290 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2291 return true;
2292 }
2293
2294
2295 /* Function vectorizable_store.
2296
2297 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2298 can be vectorized.
2299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2302
2303 bool
2304 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2305 {
2306 tree scalar_dest;
2307 tree data_ref;
2308 tree op;
2309 tree vec_oprnd = NULL_TREE;
2310 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2311 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2312 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2314 enum machine_mode vec_mode;
2315 tree dummy;
2316 enum dr_alignment_support alignment_support_cheme;
2317 ssa_op_iter iter;
2318 def_operand_p def_p;
2319 tree def, def_stmt;
2320 enum vect_def_type dt;
2321 stmt_vec_info prev_stmt_info;
2322 tree dataref_ptr = NULL_TREE;
2323 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2324 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2325 int j;
2326
2327 gcc_assert (ncopies >= 1);
2328
2329 /* Is vectorizable store? */
2330
2331 if (TREE_CODE (stmt) != MODIFY_EXPR)
2332 return false;
2333
2334 scalar_dest = TREE_OPERAND (stmt, 0);
2335 if (TREE_CODE (scalar_dest) != ARRAY_REF
2336 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2337 return false;
2338
2339 op = TREE_OPERAND (stmt, 1);
2340 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2341 {
2342 if (vect_print_dump_info (REPORT_DETAILS))
2343 fprintf (vect_dump, "use not simple.");
2344 return false;
2345 }
2346
2347 vec_mode = TYPE_MODE (vectype);
2348 /* FORNOW. In some cases can vectorize even if data-type not supported
2349 (e.g. - array initialization with 0). */
2350 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2351 return false;
2352
2353 if (!STMT_VINFO_DATA_REF (stmt_info))
2354 return false;
2355
2356
2357 if (!vec_stmt) /* transformation not required. */
2358 {
2359 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2360 return true;
2361 }
2362
2363 /** Transform. **/
2364
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2367
2368 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2369 gcc_assert (alignment_support_cheme);
2370 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2371
2372 /* In case the vectorization factor (VF) is bigger than the number
2373 of elements that we can fit in a vectype (nunits), we have to generate
2374 more than one vector stmt - i.e - we need to "unroll" the
2375 vector stmt by a factor VF/nunits. For more details see documentation in
2376 vect_get_vec_def_for_copy_stmt. */
2377
2378 prev_stmt_info = NULL;
2379 for (j = 0; j < ncopies; j++)
2380 {
2381 tree new_stmt;
2382 tree ptr_incr;
2383
2384 if (j == 0)
2385 {
2386 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2387 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy,
2388 &ptr_incr, false);
2389 }
2390 else
2391 {
2392 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2393 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2394 }
2395
2396 /* Arguments are ready. create the new vector stmt. */
2397 data_ref = build_fold_indirect_ref (dataref_ptr);
2398 new_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd);
2399 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2400
2401 /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
2402 use outside the loop and a loop peel is performed then the def may be
2403 renamed by the peel. Mark it for renaming so the later use will also
2404 be renamed. */
2405 copy_virtual_operands (new_stmt, stmt);
2406 if (j == 0)
2407 {
2408 /* The original store is deleted so the same SSA_NAMEs can be used.
2409 */
2410 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
2411 {
2412 SSA_NAME_DEF_STMT (def) = new_stmt;
2413 mark_sym_for_renaming (SSA_NAME_VAR (def));
2414 }
2415
2416 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2417 }
2418 else
2419 {
2420 /* Create new names for all the definitions created by COPY and
2421 add replacement mappings for each new name. */
2422 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
2423 {
2424 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2425 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2426 }
2427
2428 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2429 }
2430
2431 prev_stmt_info = vinfo_for_stmt (new_stmt);
2432 }
2433
2434 return true;
2435 }
2436
2437
2438 /* Function vect_setup_realignment
2439
2440 This function is called when vectorizing an unaligned load using
2441 the dr_unaligned_software_pipeline scheme.
2442 This function generates the following code at the loop prolog:
2443
2444 p = initial_addr;
2445 msq_init = *(floor(p)); # prolog load
2446 realignment_token = call target_builtin;
2447 loop:
2448 msq = phi (msq_init, ---)
2449
2450 The code above sets up a new (vector) pointer, pointing to the first
2451 location accessed by STMT, and a "floor-aligned" load using that pointer.
2452 It also generates code to compute the "realignment-token" (if the relevant
2453 target hook was defined), and creates a phi-node at the loop-header bb
2454 whose arguments are the result of the prolog-load (created by this
2455 function) and the result of a load that takes place in the loop (to be
2456 created by the caller to this function).
2457 The caller to this function uses the phi-result (msq) to create the
2458 realignment code inside the loop, and sets up the missing phi argument,
2459 as follows:
2460
2461 loop:
2462 msq = phi (msq_init, lsq)
2463 lsq = *(floor(p')); # load in loop
2464 result = realign_load (msq, lsq, realignment_token);
2465
2466 Input:
2467 STMT - (scalar) load stmt to be vectorized. This load accesses
2468 a memory location that may be unaligned.
2469 BSI - place where new code is to be inserted.
2470
2471 Output:
2472 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2473 target hook, if defined.
2474 Return value - the result of the loop-header phi node.
2475 */
2476
2477 static tree
2478 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
2479 tree *realignment_token)
2480 {
2481 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2482 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2485 edge pe = loop_preheader_edge (loop);
2486 tree scalar_dest = TREE_OPERAND (stmt, 0);
2487 tree vec_dest;
2488 tree init_addr;
2489 tree inc;
2490 tree ptr;
2491 tree data_ref;
2492 tree new_stmt;
2493 basic_block new_bb;
2494 tree msq_init;
2495 tree new_temp;
2496 tree phi_stmt;
2497 tree msq;
2498
2499 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2500 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2501 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true);
2502 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2503 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2504 new_temp = make_ssa_name (vec_dest, new_stmt);
2505 TREE_OPERAND (new_stmt, 0) = new_temp;
2506 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2507 gcc_assert (!new_bb);
2508 msq_init = TREE_OPERAND (new_stmt, 0);
2509 copy_virtual_operands (new_stmt, stmt);
2510 update_vuses_to_preheader (new_stmt, loop);
2511
2512 /* 2. Create permutation mask, if required, in loop preheader. */
2513 if (targetm.vectorize.builtin_mask_for_load)
2514 {
2515 tree builtin_decl;
2516 tree params = build_tree_list (NULL_TREE, init_addr);
2517
2518 vec_dest = vect_create_destination_var (scalar_dest,
2519 TREE_TYPE (new_stmt));
2520 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2521 new_stmt = build_function_call_expr (builtin_decl, params);
2522 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, new_stmt);
2523 new_temp = make_ssa_name (vec_dest, new_stmt);
2524 TREE_OPERAND (new_stmt, 0) = new_temp;
2525 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2526 gcc_assert (!new_bb);
2527 *realignment_token = TREE_OPERAND (new_stmt, 0);
2528
2529 /* The result of the CALL_EXPR to this builtin is determined from
2530 the value of the parameter and no global variables are touched
2531 which makes the builtin a "const" function. Requiring the
2532 builtin to have the "const" attribute makes it unnecessary
2533 to call mark_call_clobbered. */
2534 gcc_assert (TREE_READONLY (builtin_decl));
2535 }
2536
2537 /* 3. Create msq = phi <msq_init, lsq> in loop */
2538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2539 msq = make_ssa_name (vec_dest, NULL_TREE);
2540 phi_stmt = create_phi_node (msq, loop->header);
2541 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2542 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2543
2544 return msq;
2545 }
2546
2547
2548 /* vectorizable_load.
2549
2550 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2551 can be vectorized.
2552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2555
2556 bool
2557 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2558 {
2559 tree scalar_dest;
2560 tree vec_dest = NULL;
2561 tree data_ref = NULL;
2562 tree op;
2563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2564 stmt_vec_info prev_stmt_info;
2565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2566 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2567 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2569 tree new_temp;
2570 int mode;
2571 tree new_stmt;
2572 tree dummy;
2573 enum dr_alignment_support alignment_support_cheme;
2574 tree dataref_ptr = NULL_TREE;
2575 tree ptr_incr;
2576 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2577 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2578 int j;
2579 tree msq = NULL_TREE, lsq;
2580 tree offset = NULL_TREE;
2581 tree realignment_token = NULL_TREE;
2582 tree phi_stmt = NULL_TREE;
2583
2584 /* Is vectorizable load? */
2585 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2586 return false;
2587
2588 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2589
2590 if (STMT_VINFO_LIVE_P (stmt_info))
2591 {
2592 /* FORNOW: not yet supported. */
2593 if (vect_print_dump_info (REPORT_DETAILS))
2594 fprintf (vect_dump, "value used after loop.");
2595 return false;
2596 }
2597
2598 if (TREE_CODE (stmt) != MODIFY_EXPR)
2599 return false;
2600
2601 scalar_dest = TREE_OPERAND (stmt, 0);
2602 if (TREE_CODE (scalar_dest) != SSA_NAME)
2603 return false;
2604
2605 op = TREE_OPERAND (stmt, 1);
2606 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2607 return false;
2608
2609 if (!STMT_VINFO_DATA_REF (stmt_info))
2610 return false;
2611
2612 mode = (int) TYPE_MODE (vectype);
2613
2614 /* FORNOW. In some cases can vectorize even if data-type not supported
2615 (e.g. - data copies). */
2616 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2617 {
2618 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "Aligned load, but unsupported type.");
2620 return false;
2621 }
2622
2623 if (!vec_stmt) /* transformation not required. */
2624 {
2625 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2626 return true;
2627 }
2628
2629 /** Transform. **/
2630
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 fprintf (vect_dump, "transform load.");
2633
2634 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2635 gcc_assert (alignment_support_cheme);
2636
2637 /* In case the vectorization factor (VF) is bigger than the number
2638 of elements that we can fit in a vectype (nunits), we have to generate
2639 more than one vector stmt - i.e - we need to "unroll" the
2640 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2641 from one copy of the vector stmt to the next, in the field
2642 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2643 stages to find the correct vector defs to be used when vectorizing
2644 stmts that use the defs of the current stmt. The example below illustrates
2645 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2646 4 vectorized stmts):
2647
2648 before vectorization:
2649 RELATED_STMT VEC_STMT
2650 S1: x = memref - -
2651 S2: z = x + 1 - -
2652
2653 step 1: vectorize stmt S1:
2654 We first create the vector stmt VS1_0, and, as usual, record a
2655 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
2656 Next, we create the vector stmt VS1_1, and record a pointer to
2657 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
2658 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
2659 stmts and pointers:
2660 RELATED_STMT VEC_STMT
2661 VS1_0: vx0 = memref0 VS1_1 -
2662 VS1_1: vx1 = memref1 VS1_2 -
2663 VS1_2: vx2 = memref2 VS1_3 -
2664 VS1_3: vx3 = memref3 - -
2665 S1: x = load - VS1_0
2666 S2: z = x + 1 - -
2667
2668 See in documentation in vect_get_vec_def_for_stmt_copy for how the
2669 information we recorded in RELATED_STMT field is used to vectorize
2670 stmt S2. */
2671
2672 /* If the data reference is aligned (dr_aligned) or potentially unaligned
2673 on a target that supports unaligned accesses (dr_unaligned_supported)
2674 we generate the following code:
2675 p = initial_addr;
2676 indx = 0;
2677 loop {
2678 p = p + indx * vectype_size;
2679 vec_dest = *(p);
2680 indx = indx + 1;
2681 }
2682
2683 Otherwise, the data reference is potentially unaligned on a target that
2684 does not support unaligned accesses (dr_unaligned_software_pipeline) -
2685 then generate the following code, in which the data in each iteration is
2686 obtained by two vector loads, one from the previous iteration, and one
2687 from the current iteration:
2688 p1 = initial_addr;
2689 msq_init = *(floor(p1))
2690 p2 = initial_addr + VS - 1;
2691 realignment_token = call target_builtin;
2692 indx = 0;
2693 loop {
2694 p2 = p2 + indx * vectype_size
2695 lsq = *(floor(p2))
2696 vec_dest = realign_load (msq, lsq, realignment_token)
2697 indx = indx + 1;
2698 msq = lsq;
2699 }
2700 */
2701
2702 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2703 {
2704 msq = vect_setup_realignment (stmt, bsi, &realignment_token);
2705 phi_stmt = SSA_NAME_DEF_STMT (msq);
2706 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
2707 }
2708
2709 prev_stmt_info = NULL;
2710 for (j = 0; j < ncopies; j++)
2711 {
2712 /* 1. Create the vector pointer update chain. */
2713 if (j == 0)
2714 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset,
2715 &dummy, &ptr_incr, false);
2716 else
2717 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2718
2719 /* 2. Create the vector-load in the loop. */
2720 switch (alignment_support_cheme)
2721 {
2722 case dr_aligned:
2723 gcc_assert (aligned_access_p (dr));
2724 data_ref = build_fold_indirect_ref (dataref_ptr);
2725 break;
2726 case dr_unaligned_supported:
2727 {
2728 int mis = DR_MISALIGNMENT (dr);
2729 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2730
2731 gcc_assert (!aligned_access_p (dr));
2732 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2733 data_ref =
2734 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
2735 break;
2736 }
2737 case dr_unaligned_software_pipeline:
2738 gcc_assert (!aligned_access_p (dr));
2739 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2740 break;
2741 default:
2742 gcc_unreachable ();
2743 }
2744 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2745 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2746 new_temp = make_ssa_name (vec_dest, new_stmt);
2747 TREE_OPERAND (new_stmt, 0) = new_temp;
2748 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2749 copy_virtual_operands (new_stmt, stmt);
2750 mark_new_vars_to_rename (new_stmt);
2751
2752 /* 3. Handle explicit realignment if necessary/supported. */
2753 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2754 {
2755 /* Create in loop:
2756 <vec_dest = realign_load (msq, lsq, realignment_token)> */
2757 lsq = TREE_OPERAND (new_stmt, 0);
2758 if (!realignment_token)
2759 realignment_token = dataref_ptr;
2760 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2761 new_stmt =
2762 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
2763 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2764 new_temp = make_ssa_name (vec_dest, new_stmt);
2765 TREE_OPERAND (new_stmt, 0) = new_temp;
2766 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2767 if (j == ncopies - 1)
2768 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2769 msq = lsq;
2770 }
2771
2772 if (j == 0)
2773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2774 else
2775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2776 prev_stmt_info = vinfo_for_stmt (new_stmt);
2777 }
2778
2779 return true;
2780 }
2781
2782
2783 /* Function vectorizable_live_operation.
2784
2785 STMT computes a value that is used outside the loop. Check if
2786 it can be supported. */
2787
2788 bool
2789 vectorizable_live_operation (tree stmt,
2790 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2791 tree *vec_stmt ATTRIBUTE_UNUSED)
2792 {
2793 tree operation;
2794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2796 int i;
2797 enum tree_code code;
2798 int op_type;
2799 tree op;
2800 tree def, def_stmt;
2801 enum vect_def_type dt;
2802
2803 if (!STMT_VINFO_LIVE_P (stmt_info))
2804 return false;
2805
2806 if (TREE_CODE (stmt) != MODIFY_EXPR)
2807 return false;
2808
2809 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2810 return false;
2811
2812 operation = TREE_OPERAND (stmt, 1);
2813 code = TREE_CODE (operation);
2814
2815 op_type = TREE_CODE_LENGTH (code);
2816
2817 /* FORNOW: support only if all uses are invariant. This means
2818 that the scalar operations can remain in place, unvectorized.
2819 The original last scalar value that they compute will be used. */
2820
2821 for (i = 0; i < op_type; i++)
2822 {
2823 op = TREE_OPERAND (operation, i);
2824 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2825 {
2826 if (vect_print_dump_info (REPORT_DETAILS))
2827 fprintf (vect_dump, "use not simple.");
2828 return false;
2829 }
2830
2831 if (dt != vect_invariant_def && dt != vect_constant_def)
2832 return false;
2833 }
2834
2835 /* No transformation is required for the cases we currently support. */
2836 return true;
2837 }
2838
2839
2840 /* Function vect_is_simple_cond.
2841
2842 Input:
2843 LOOP - the loop that is being vectorized.
2844 COND - Condition that is checked for simple use.
2845
2846 Returns whether a COND can be vectorized. Checks whether
2847 condition operands are supportable using vec_is_simple_use. */
2848
2849 static bool
2850 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2851 {
2852 tree lhs, rhs;
2853 tree def;
2854 enum vect_def_type dt;
2855
2856 if (!COMPARISON_CLASS_P (cond))
2857 return false;
2858
2859 lhs = TREE_OPERAND (cond, 0);
2860 rhs = TREE_OPERAND (cond, 1);
2861
2862 if (TREE_CODE (lhs) == SSA_NAME)
2863 {
2864 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2865 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2866 return false;
2867 }
2868 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2869 return false;
2870
2871 if (TREE_CODE (rhs) == SSA_NAME)
2872 {
2873 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2874 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2875 return false;
2876 }
2877 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
2878 return false;
2879
2880 return true;
2881 }
2882
2883 /* vectorizable_condition.
2884
2885 Check if STMT is conditional modify expression that can be vectorized.
2886 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2887 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2888 at BSI.
2889
2890 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2891
2892 bool
2893 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2894 {
2895 tree scalar_dest = NULL_TREE;
2896 tree vec_dest = NULL_TREE;
2897 tree op = NULL_TREE;
2898 tree cond_expr, then_clause, else_clause;
2899 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2900 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2901 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2902 tree vec_compare, vec_cond_expr;
2903 tree new_temp;
2904 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2905 enum machine_mode vec_mode;
2906 tree def;
2907 enum vect_def_type dt;
2908 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2909 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2910
2911 gcc_assert (ncopies >= 1);
2912 if (ncopies > 1)
2913 return false; /* FORNOW */
2914
2915 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2916 return false;
2917
2918 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2919
2920 if (STMT_VINFO_LIVE_P (stmt_info))
2921 {
2922 /* FORNOW: not yet supported. */
2923 if (vect_print_dump_info (REPORT_DETAILS))
2924 fprintf (vect_dump, "value used after loop.");
2925 return false;
2926 }
2927
2928 if (TREE_CODE (stmt) != MODIFY_EXPR)
2929 return false;
2930
2931 op = TREE_OPERAND (stmt, 1);
2932
2933 if (TREE_CODE (op) != COND_EXPR)
2934 return false;
2935
2936 cond_expr = TREE_OPERAND (op, 0);
2937 then_clause = TREE_OPERAND (op, 1);
2938 else_clause = TREE_OPERAND (op, 2);
2939
2940 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2941 return false;
2942
2943 /* We do not handle two different vector types for the condition
2944 and the values. */
2945 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2946 return false;
2947
2948 if (TREE_CODE (then_clause) == SSA_NAME)
2949 {
2950 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2951 if (!vect_is_simple_use (then_clause, loop_vinfo,
2952 &then_def_stmt, &def, &dt))
2953 return false;
2954 }
2955 else if (TREE_CODE (then_clause) != INTEGER_CST
2956 && TREE_CODE (then_clause) != REAL_CST)
2957 return false;
2958
2959 if (TREE_CODE (else_clause) == SSA_NAME)
2960 {
2961 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2962 if (!vect_is_simple_use (else_clause, loop_vinfo,
2963 &else_def_stmt, &def, &dt))
2964 return false;
2965 }
2966 else if (TREE_CODE (else_clause) != INTEGER_CST
2967 && TREE_CODE (else_clause) != REAL_CST)
2968 return false;
2969
2970
2971 vec_mode = TYPE_MODE (vectype);
2972
2973 if (!vec_stmt)
2974 {
2975 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2976 return expand_vec_cond_expr_p (op, vec_mode);
2977 }
2978
2979 /* Transform */
2980
2981 /* Handle def. */
2982 scalar_dest = TREE_OPERAND (stmt, 0);
2983 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2984
2985 /* Handle cond expr. */
2986 vec_cond_lhs =
2987 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2988 vec_cond_rhs =
2989 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2990 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2991 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2992
2993 /* Arguments are ready. create the new vector stmt. */
2994 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2995 vec_cond_lhs, vec_cond_rhs);
2996 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2997 vec_compare, vec_then_clause, vec_else_clause);
2998
2999 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
3000 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3001 TREE_OPERAND (*vec_stmt, 0) = new_temp;
3002 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3003
3004 return true;
3005 }
3006
3007 /* Function vect_transform_stmt.
3008
3009 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3010
3011 bool
3012 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3013 {
3014 bool is_store = false;
3015 tree vec_stmt = NULL_TREE;
3016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3017 tree orig_stmt_in_pattern;
3018 bool done;
3019
3020 if (STMT_VINFO_RELEVANT_P (stmt_info))
3021 {
3022 switch (STMT_VINFO_TYPE (stmt_info))
3023 {
3024 case type_demotion_vec_info_type:
3025 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3026 gcc_assert (done);
3027 break;
3028
3029 case type_promotion_vec_info_type:
3030 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3031 gcc_assert (done);
3032 break;
3033
3034 case op_vec_info_type:
3035 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3036 gcc_assert (done);
3037 break;
3038
3039 case assignment_vec_info_type:
3040 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3041 gcc_assert (done);
3042 break;
3043
3044 case load_vec_info_type:
3045 done = vectorizable_load (stmt, bsi, &vec_stmt);
3046 gcc_assert (done);
3047 break;
3048
3049 case store_vec_info_type:
3050 done = vectorizable_store (stmt, bsi, &vec_stmt);
3051 gcc_assert (done);
3052 is_store = true;
3053 break;
3054
3055 case condition_vec_info_type:
3056 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3057 gcc_assert (done);
3058 break;
3059
3060 default:
3061 if (vect_print_dump_info (REPORT_DETAILS))
3062 fprintf (vect_dump, "stmt not supported.");
3063 gcc_unreachable ();
3064 }
3065
3066 gcc_assert (vec_stmt);
3067 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3068 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3069 if (orig_stmt_in_pattern)
3070 {
3071 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3072 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3073 {
3074 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3075
3076 /* STMT was inserted by the vectorizer to replace a computation
3077 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
3078 sequence that computed this idiom. We need to record a pointer
3079 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
3080 detail in the documentation of vect_pattern_recog. */
3081
3082 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3083 }
3084 }
3085 }
3086
3087 if (STMT_VINFO_LIVE_P (stmt_info))
3088 {
3089 switch (STMT_VINFO_TYPE (stmt_info))
3090 {
3091 case reduc_vec_info_type:
3092 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
3093 gcc_assert (done);
3094 break;
3095
3096 default:
3097 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
3098 gcc_assert (done);
3099 }
3100 }
3101
3102 return is_store;
3103 }
3104
3105
3106 /* This function builds ni_name = number of iterations loop executes
3107 on the loop preheader. */
3108
3109 static tree
3110 vect_build_loop_niters (loop_vec_info loop_vinfo)
3111 {
3112 tree ni_name, stmt, var;
3113 edge pe;
3114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3115 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3116
3117 var = create_tmp_var (TREE_TYPE (ni), "niters");
3118 add_referenced_var (var);
3119 ni_name = force_gimple_operand (ni, &stmt, false, var);
3120
3121 pe = loop_preheader_edge (loop);
3122 if (stmt)
3123 {
3124 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3125 gcc_assert (!new_bb);
3126 }
3127
3128 return ni_name;
3129 }
3130
3131
3132 /* This function generates the following statements:
3133
3134 ni_name = number of iterations loop executes
3135 ratio = ni_name / vf
3136 ratio_mult_vf_name = ratio * vf
3137
3138 and places them at the loop preheader edge. */
3139
3140 static void
3141 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3142 tree *ni_name_ptr,
3143 tree *ratio_mult_vf_name_ptr,
3144 tree *ratio_name_ptr)
3145 {
3146
3147 edge pe;
3148 basic_block new_bb;
3149 tree stmt, ni_name;
3150 tree var;
3151 tree ratio_name;
3152 tree ratio_mult_vf_name;
3153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3154 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3155 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3156 tree log_vf;
3157
3158 pe = loop_preheader_edge (loop);
3159
3160 /* Generate temporary variable that contains
3161 number of iterations loop executes. */
3162
3163 ni_name = vect_build_loop_niters (loop_vinfo);
3164 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
3165
3166 /* Create: ratio = ni >> log2(vf) */
3167
3168 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3169 add_referenced_var (var);
3170 ratio_name = make_ssa_name (var, NULL_TREE);
3171 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3172 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3173 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3174
3175 pe = loop_preheader_edge (loop);
3176 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3177 gcc_assert (!new_bb);
3178
3179 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3180
3181 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3182 add_referenced_var (var);
3183 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3184 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3185 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3186 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3187
3188 pe = loop_preheader_edge (loop);
3189 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3190 gcc_assert (!new_bb);
3191
3192 *ni_name_ptr = ni_name;
3193 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3194 *ratio_name_ptr = ratio_name;
3195
3196 return;
3197 }
3198
3199
3200 /* Function update_vuses_to_preheader.
3201
3202 Input:
3203 STMT - a statement with potential VUSEs.
3204 LOOP - the loop whose preheader will contain STMT.
3205
3206 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
3207 appears to be defined in a V_MAY_DEF in another statement in a loop.
3208 One such case is when the VUSE is at the dereference of a __restricted__
3209 pointer in a load and the V_MAY_DEF is at the dereference of a different
3210 __restricted__ pointer in a store. Vectorization may result in
3211 copy_virtual_uses being called to copy the problematic VUSE to a new
3212 statement that is being inserted in the loop preheader. This procedure
3213 is called to change the SSA_NAME in the new statement's VUSE from the
3214 SSA_NAME updated in the loop to the related SSA_NAME available on the
3215 path entering the loop.
3216
3217 When this function is called, we have the following situation:
3218
3219 # vuse <name1>
3220 S1: vload
3221 do {
3222 # name1 = phi < name0 , name2>
3223
3224 # vuse <name1>
3225 S2: vload
3226
3227 # name2 = vdef <name1>
3228 S3: vstore
3229
3230 }while...
3231
3232 Stmt S1 was created in the loop preheader block as part of misaligned-load
3233 handling. This function fixes the name of the vuse of S1 from 'name1' to
3234 'name0'. */
3235
3236 static void
3237 update_vuses_to_preheader (tree stmt, struct loop *loop)
3238 {
3239 basic_block header_bb = loop->header;
3240 edge preheader_e = loop_preheader_edge (loop);
3241 ssa_op_iter iter;
3242 use_operand_p use_p;
3243
3244 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
3245 {
3246 tree ssa_name = USE_FROM_PTR (use_p);
3247 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
3248 tree name_var = SSA_NAME_VAR (ssa_name);
3249 basic_block bb = bb_for_stmt (def_stmt);
3250
3251 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
3252 if (!IS_EMPTY_STMT (def_stmt)
3253 && flow_bb_inside_loop_p (loop, bb))
3254 {
3255 /* If the block containing the statement defining the SSA_NAME
3256 is in the loop then it's necessary to find the definition
3257 outside the loop using the PHI nodes of the header. */
3258 tree phi;
3259 bool updated = false;
3260
3261 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
3262 {
3263 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
3264 {
3265 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
3266 updated = true;
3267 break;
3268 }
3269 }
3270 gcc_assert (updated);
3271 }
3272 }
3273 }
3274
3275
3276 /* Function vect_update_ivs_after_vectorizer.
3277
3278 "Advance" the induction variables of LOOP to the value they should take
3279 after the execution of LOOP. This is currently necessary because the
3280 vectorizer does not handle induction variables that are used after the
3281 loop. Such a situation occurs when the last iterations of LOOP are
3282 peeled, because:
3283 1. We introduced new uses after LOOP for IVs that were not originally used
3284 after LOOP: the IVs of LOOP are now used by an epilog loop.
3285 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3286 times, whereas the loop IVs should be bumped N times.
3287
3288 Input:
3289 - LOOP - a loop that is going to be vectorized. The last few iterations
3290 of LOOP were peeled.
3291 - NITERS - the number of iterations that LOOP executes (before it is
3292 vectorized). i.e, the number of times the ivs should be bumped.
3293 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3294 coming out from LOOP on which there are uses of the LOOP ivs
3295 (this is the path from LOOP->exit to epilog_loop->preheader).
3296
3297 The new definitions of the ivs are placed in LOOP->exit.
3298 The phi args associated with the edge UPDATE_E in the bb
3299 UPDATE_E->dest are updated accordingly.
3300
3301 Assumption 1: Like the rest of the vectorizer, this function assumes
3302 a single loop exit that has a single predecessor.
3303
3304 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3305 organized in the same order.
3306
3307 Assumption 3: The access function of the ivs is simple enough (see
3308 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3309
3310 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3311 coming out of LOOP on which the ivs of LOOP are used (this is the path
3312 that leads to the epilog loop; other paths skip the epilog loop). This
3313 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3314 needs to have its phis updated.
3315 */
3316
3317 static void
3318 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3319 edge update_e)
3320 {
3321 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3322 basic_block exit_bb = loop->single_exit->dest;
3323 tree phi, phi1;
3324 basic_block update_bb = update_e->dest;
3325
3326 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3327
3328 /* Make sure there exists a single-predecessor exit bb: */
3329 gcc_assert (single_pred_p (exit_bb));
3330
3331 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3332 phi && phi1;
3333 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3334 {
3335 tree access_fn = NULL;
3336 tree evolution_part;
3337 tree init_expr;
3338 tree step_expr;
3339 tree var, stmt, ni, ni_name;
3340 block_stmt_iterator last_bsi;
3341
3342 if (vect_print_dump_info (REPORT_DETAILS))
3343 {
3344 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
3345 print_generic_expr (vect_dump, phi, TDF_SLIM);
3346 }
3347
3348 /* Skip virtual phi's. */
3349 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3350 {
3351 if (vect_print_dump_info (REPORT_DETAILS))
3352 fprintf (vect_dump, "virtual phi. skip.");
3353 continue;
3354 }
3355
3356 /* Skip reduction phis. */
3357 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3358 {
3359 if (vect_print_dump_info (REPORT_DETAILS))
3360 fprintf (vect_dump, "reduc phi. skip.");
3361 continue;
3362 }
3363
3364 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3365 gcc_assert (access_fn);
3366 evolution_part =
3367 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3368 gcc_assert (evolution_part != NULL_TREE);
3369
3370 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3371 of degree >= 2 or exponential. */
3372 gcc_assert (!tree_is_chrec (evolution_part));
3373
3374 step_expr = evolution_part;
3375 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3376 loop->num));
3377
3378 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3379 build2 (MULT_EXPR, TREE_TYPE (niters),
3380 niters, step_expr), init_expr);
3381
3382 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3383 add_referenced_var (var);
3384
3385 ni_name = force_gimple_operand (ni, &stmt, false, var);
3386
3387 /* Insert stmt into exit_bb. */
3388 last_bsi = bsi_last (exit_bb);
3389 if (stmt)
3390 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3391
3392 /* Fix phi expressions in the successor bb. */
3393 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3394 }
3395 }
3396
3397
3398 /* Function vect_do_peeling_for_loop_bound
3399
3400 Peel the last iterations of the loop represented by LOOP_VINFO.
3401 The peeled iterations form a new epilog loop. Given that the loop now
3402 iterates NITERS times, the new epilog loop iterates
3403 NITERS % VECTORIZATION_FACTOR times.
3404
3405 The original loop will later be made to iterate
3406 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3407
3408 static void
3409 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3410 struct loops *loops)
3411 {
3412 tree ni_name, ratio_mult_vf_name;
3413 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3414 struct loop *new_loop;
3415 edge update_e;
3416 basic_block preheader;
3417 int loop_num;
3418
3419 if (vect_print_dump_info (REPORT_DETAILS))
3420 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
3421
3422 initialize_original_copy_tables ();
3423
3424 /* Generate the following variables on the preheader of original loop:
3425
3426 ni_name = number of iteration the original loop executes
3427 ratio = ni_name / vf
3428 ratio_mult_vf_name = ratio * vf */
3429 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3430 &ratio_mult_vf_name, ratio);
3431
3432 loop_num = loop->num;
3433 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
3434 ratio_mult_vf_name, ni_name, false);
3435 gcc_assert (new_loop);
3436 gcc_assert (loop_num == loop->num);
3437 #ifdef ENABLE_CHECKING
3438 slpeel_verify_cfg_after_peeling (loop, new_loop);
3439 #endif
3440
3441 /* A guard that controls whether the new_loop is to be executed or skipped
3442 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3443 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3444 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3445 is on the path where the LOOP IVs are used and need to be updated. */
3446
3447 preheader = loop_preheader_edge (new_loop)->src;
3448 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
3449 update_e = EDGE_PRED (preheader, 0);
3450 else
3451 update_e = EDGE_PRED (preheader, 1);
3452
3453 /* Update IVs of original loop as if they were advanced
3454 by ratio_mult_vf_name steps. */
3455 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3456
3457 /* After peeling we have to reset scalar evolution analyzer. */
3458 scev_reset ();
3459
3460 free_original_copy_tables ();
3461 }
3462
3463
3464 /* Function vect_gen_niters_for_prolog_loop
3465
3466 Set the number of iterations for the loop represented by LOOP_VINFO
3467 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3468 and the misalignment of DR - the data reference recorded in
3469 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3470 this loop, the data reference DR will refer to an aligned location.
3471
3472 The following computation is generated:
3473
3474 If the misalignment of DR is known at compile time:
3475 addr_mis = int mis = DR_MISALIGNMENT (dr);
3476 Else, compute address misalignment in bytes:
3477 addr_mis = addr & (vectype_size - 1)
3478
3479 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3480
3481 (elem_size = element type size; an element is the scalar element
3482 whose type is the inner type of the vectype) */
3483
3484 static tree
3485 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3486 {
3487 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3488 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3490 tree var, stmt;
3491 tree iters, iters_name;
3492 edge pe;
3493 basic_block new_bb;
3494 tree dr_stmt = DR_STMT (dr);
3495 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3497 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3498 tree niters_type = TREE_TYPE (loop_niters);
3499
3500 pe = loop_preheader_edge (loop);
3501
3502 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
3503 {
3504 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
3505 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
3506 int elem_misalign = byte_misalign / element_size;
3507
3508 if (vect_print_dump_info (REPORT_DETAILS))
3509 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
3510 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
3511 }
3512 else
3513 {
3514 tree new_stmts = NULL_TREE;
3515 tree start_addr =
3516 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3517 tree ptr_type = TREE_TYPE (start_addr);
3518 tree size = TYPE_SIZE (ptr_type);
3519 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3520 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3521 tree elem_size_log =
3522 build_int_cst (type, exact_log2 (vectype_align/vf));
3523 tree vf_minus_1 = build_int_cst (type, vf - 1);
3524 tree vf_tree = build_int_cst (type, vf);
3525 tree byte_misalign;
3526 tree elem_misalign;
3527
3528 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3529 gcc_assert (!new_bb);
3530
3531 /* Create: byte_misalign = addr & (vectype_size - 1) */
3532 byte_misalign =
3533 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3534
3535 /* Create: elem_misalign = byte_misalign / element_size */
3536 elem_misalign =
3537 build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
3538
3539 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3540 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
3541 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
3542 iters = fold_convert (niters_type, iters);
3543 }
3544
3545 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3546 /* If the loop bound is known at compile time we already verified that it is
3547 greater than vf; since the misalignment ('iters') is at most vf, there's
3548 no need to generate the MIN_EXPR in this case. */
3549 if (TREE_CODE (loop_niters) != INTEGER_CST)
3550 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3551
3552 if (vect_print_dump_info (REPORT_DETAILS))
3553 {
3554 fprintf (vect_dump, "niters for prolog loop: ");
3555 print_generic_expr (vect_dump, iters, TDF_SLIM);
3556 }
3557
3558 var = create_tmp_var (niters_type, "prolog_loop_niters");
3559 add_referenced_var (var);
3560 iters_name = force_gimple_operand (iters, &stmt, false, var);
3561
3562 /* Insert stmt on loop preheader edge. */
3563 if (stmt)
3564 {
3565 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3566 gcc_assert (!new_bb);
3567 }
3568
3569 return iters_name;
3570 }
3571
3572
3573 /* Function vect_update_init_of_dr
3574
3575 NITERS iterations were peeled from LOOP. DR represents a data reference
3576 in LOOP. This function updates the information recorded in DR to
3577 account for the fact that the first NITERS iterations had already been
3578 executed. Specifically, it updates the OFFSET field of DR. */
3579
3580 static void
3581 vect_update_init_of_dr (struct data_reference *dr, tree niters)
3582 {
3583 tree offset = DR_OFFSET (dr);
3584
3585 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
3586 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
3587 DR_OFFSET (dr) = offset;
3588 }
3589
3590
3591 /* Function vect_update_inits_of_drs
3592
3593 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3594 This function updates the information recorded for the data references in
3595 the loop to account for the fact that the first NITERS iterations had
3596 already been executed. Specifically, it updates the initial_condition of the
3597 access_function of all the data_references in the loop. */
3598
3599 static void
3600 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3601 {
3602 unsigned int i;
3603 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3604 struct data_reference *dr;
3605
3606 if (vect_dump && (dump_flags & TDF_DETAILS))
3607 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3608
3609 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3610 vect_update_init_of_dr (dr, niters);
3611 }
3612
3613
3614 /* Function vect_do_peeling_for_alignment
3615
3616 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3617 'niters' is set to the misalignment of one of the data references in the
3618 loop, thereby forcing it to refer to an aligned location at the beginning
3619 of the execution of this loop. The data reference for which we are
3620 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3621
3622 static void
3623 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3624 {
3625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3626 tree niters_of_prolog_loop, ni_name;
3627 tree n_iters;
3628 struct loop *new_loop;
3629
3630 if (vect_print_dump_info (REPORT_DETAILS))
3631 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3632
3633 initialize_original_copy_tables ();
3634
3635 ni_name = vect_build_loop_niters (loop_vinfo);
3636 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3637
3638 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3639 new_loop =
3640 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3641 niters_of_prolog_loop, ni_name, true);
3642 gcc_assert (new_loop);
3643 #ifdef ENABLE_CHECKING
3644 slpeel_verify_cfg_after_peeling (new_loop, loop);
3645 #endif
3646
3647 /* Update number of times loop executes. */
3648 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3649 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
3650 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3651
3652 /* Update the init conditions of the access functions of all data refs. */
3653 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3654
3655 /* After peeling we have to reset scalar evolution analyzer. */
3656 scev_reset ();
3657
3658 free_original_copy_tables ();
3659 }
3660
3661
3662 /* Function vect_create_cond_for_align_checks.
3663
3664 Create a conditional expression that represents the alignment checks for
3665 all of data references (array element references) whose alignment must be
3666 checked at runtime.
3667
3668 Input:
3669 LOOP_VINFO - two fields of the loop information are used.
3670 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3671 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3672
3673 Output:
3674 COND_EXPR_STMT_LIST - statements needed to construct the conditional
3675 expression.
3676 The returned value is the conditional expression to be used in the if
3677 statement that controls which version of the loop gets executed at runtime.
3678
3679 The algorithm makes two assumptions:
3680 1) The number of bytes "n" in a vector is a power of 2.
3681 2) An address "a" is aligned if a%n is zero and that this
3682 test can be done as a&(n-1) == 0. For example, for 16
3683 byte vectors the test is a&0xf == 0. */
3684
3685 static tree
3686 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3687 tree *cond_expr_stmt_list)
3688 {
3689 VEC(tree,heap) *may_misalign_stmts
3690 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3691 tree ref_stmt;
3692 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3693 tree mask_cst;
3694 unsigned int i;
3695 tree psize;
3696 tree int_ptrsize_type;
3697 char tmp_name[20];
3698 tree or_tmp_name = NULL_TREE;
3699 tree and_tmp, and_tmp_name, and_stmt;
3700 tree ptrsize_zero;
3701
3702 /* Check that mask is one less than a power of 2, i.e., mask is
3703 all zeros followed by all ones. */
3704 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3705
3706 /* CHECKME: what is the best integer or unsigned type to use to hold a
3707 cast from a pointer value? */
3708 psize = TYPE_SIZE (ptr_type_node);
3709 int_ptrsize_type
3710 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
3711
3712 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3713 of the first vector of the i'th data reference. */
3714
3715 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
3716 {
3717 tree new_stmt_list = NULL_TREE;
3718 tree addr_base;
3719 tree addr_tmp, addr_tmp_name, addr_stmt;
3720 tree or_tmp, new_or_tmp_name, or_stmt;
3721
3722 /* create: addr_tmp = (int)(address_of_first_vector) */
3723 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
3724 &new_stmt_list,
3725 NULL_TREE);
3726
3727 if (new_stmt_list != NULL_TREE)
3728 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
3729
3730 sprintf (tmp_name, "%s%d", "addr2int", i);
3731 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3732 add_referenced_var (addr_tmp);
3733 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
3734 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
3735 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
3736 addr_tmp_name, addr_stmt);
3737 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
3738 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
3739
3740 /* The addresses are OR together. */
3741
3742 if (or_tmp_name != NULL_TREE)
3743 {
3744 /* create: or_tmp = or_tmp | addr_tmp */
3745 sprintf (tmp_name, "%s%d", "orptrs", i);
3746 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3747 add_referenced_var (or_tmp);
3748 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
3749 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
3750 build2 (BIT_IOR_EXPR, int_ptrsize_type,
3751 or_tmp_name,
3752 addr_tmp_name));
3753 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
3754 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
3755 or_tmp_name = new_or_tmp_name;
3756 }
3757 else
3758 or_tmp_name = addr_tmp_name;
3759
3760 } /* end for i */
3761
3762 mask_cst = build_int_cst (int_ptrsize_type, mask);
3763
3764 /* create: and_tmp = or_tmp & mask */
3765 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
3766 add_referenced_var (and_tmp);
3767 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
3768
3769 and_stmt = build2 (MODIFY_EXPR, void_type_node,
3770 and_tmp_name,
3771 build2 (BIT_AND_EXPR, int_ptrsize_type,
3772 or_tmp_name, mask_cst));
3773 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
3774 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
3775
3776 /* Make and_tmp the left operand of the conditional test against zero.
3777 if and_tmp has a nonzero bit then some address is unaligned. */
3778 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3779 return build2 (EQ_EXPR, boolean_type_node,
3780 and_tmp_name, ptrsize_zero);
3781 }
3782
3783
3784 /* Function vect_transform_loop.
3785
3786 The analysis phase has determined that the loop is vectorizable.
3787 Vectorize the loop - created vectorized stmts to replace the scalar
3788 stmts in the loop, and update the loop exit condition. */
3789
3790 void
3791 vect_transform_loop (loop_vec_info loop_vinfo,
3792 struct loops *loops ATTRIBUTE_UNUSED)
3793 {
3794 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3795 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3796 int nbbs = loop->num_nodes;
3797 block_stmt_iterator si;
3798 int i;
3799 tree ratio = NULL;
3800 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3801 bitmap_iterator bi;
3802 unsigned int j;
3803
3804 if (vect_print_dump_info (REPORT_DETAILS))
3805 fprintf (vect_dump, "=== vec_transform_loop ===");
3806
3807 /* If the loop has data references that may or may not be aligned then
3808 two versions of the loop need to be generated, one which is vectorized
3809 and one which isn't. A test is then generated to control which of the
3810 loops is executed. The test checks for the alignment of all of the
3811 data references that may or may not be aligned. */
3812
3813 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
3814 {
3815 struct loop *nloop;
3816 tree cond_expr;
3817 tree cond_expr_stmt_list = NULL_TREE;
3818 basic_block condition_bb;
3819 block_stmt_iterator cond_exp_bsi;
3820 basic_block merge_bb;
3821 basic_block new_exit_bb;
3822 edge new_exit_e, e;
3823 tree orig_phi, new_phi, arg;
3824
3825 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3826 &cond_expr_stmt_list);
3827 initialize_original_copy_tables ();
3828 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3829 free_original_copy_tables();
3830
3831 /** Loop versioning violates an assumption we try to maintain during
3832 vectorization - that the loop exit block has a single predecessor.
3833 After versioning, the exit block of both loop versions is the same
3834 basic block (i.e. it has two predecessors). Just in order to simplify
3835 following transformations in the vectorizer, we fix this situation
3836 here by adding a new (empty) block on the exit-edge of the loop,
3837 with the proper loop-exit phis to maintain loop-closed-form. **/
3838
3839 merge_bb = loop->single_exit->dest;
3840 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3841 new_exit_bb = split_edge (loop->single_exit);
3842 add_bb_to_loop (new_exit_bb, loop->outer);
3843 new_exit_e = loop->single_exit;
3844 e = EDGE_SUCC (new_exit_bb, 0);
3845
3846 for (orig_phi = phi_nodes (merge_bb); orig_phi;
3847 orig_phi = PHI_CHAIN (orig_phi))
3848 {
3849 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3850 new_exit_bb);
3851 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3852 add_phi_arg (new_phi, arg, new_exit_e);
3853 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3854 }
3855
3856 /** end loop-exit-fixes after versioning **/
3857
3858 update_ssa (TODO_update_ssa);
3859 cond_exp_bsi = bsi_last (condition_bb);
3860 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3861 }
3862
3863 /* CHECKME: we wouldn't need this if we called update_ssa once
3864 for all loops. */
3865 bitmap_zero (vect_vnames_to_rename);
3866
3867 /* Peel the loop if there are data refs with unknown alignment.
3868 Only one data ref with unknown store is allowed. */
3869
3870 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3871 vect_do_peeling_for_alignment (loop_vinfo, loops);
3872
3873 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3874 compile time constant), or it is a constant that doesn't divide by the
3875 vectorization factor, then an epilog loop needs to be created.
3876 We therefore duplicate the loop: the original loop will be vectorized,
3877 and will compute the first (n/VF) iterations. The second copy of the loop
3878 will remain scalar and will compute the remaining (n%VF) iterations.
3879 (VF is the vectorization factor). */
3880
3881 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3882 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3883 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3884 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3885 else
3886 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3887 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3888
3889 /* 1) Make sure the loop header has exactly two entries
3890 2) Make sure we have a preheader basic block. */
3891
3892 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3893
3894 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3895
3896
3897 /* FORNOW: the vectorizer supports only loops which body consist
3898 of one basic block (header + empty latch). When the vectorizer will
3899 support more involved loop forms, the order by which the BBs are
3900 traversed need to be reconsidered. */
3901
3902 for (i = 0; i < nbbs; i++)
3903 {
3904 basic_block bb = bbs[i];
3905
3906 for (si = bsi_start (bb); !bsi_end_p (si);)
3907 {
3908 tree stmt = bsi_stmt (si);
3909 stmt_vec_info stmt_info;
3910 bool is_store;
3911
3912 if (vect_print_dump_info (REPORT_DETAILS))
3913 {
3914 fprintf (vect_dump, "------>vectorizing statement: ");
3915 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3916 }
3917 stmt_info = vinfo_for_stmt (stmt);
3918 gcc_assert (stmt_info);
3919 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3920 && !STMT_VINFO_LIVE_P (stmt_info))
3921 {
3922 bsi_next (&si);
3923 continue;
3924 }
3925
3926 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3927 != (unsigned HOST_WIDE_INT) vectorization_factor)
3928 && vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "multiple-types.");
3930
3931 /* -------- vectorize statement ------------ */
3932 if (vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "transform statement.");
3934
3935 is_store = vect_transform_stmt (stmt, &si);
3936 if (is_store)
3937 {
3938 /* Free the attached stmt_vec_info and remove the stmt. */
3939 stmt_ann_t ann = stmt_ann (stmt);
3940 free (stmt_info);
3941 set_stmt_info (ann, NULL);
3942 bsi_remove (&si, true);
3943 continue;
3944 }
3945
3946 bsi_next (&si);
3947 } /* stmts in BB */
3948 } /* BBs in loop */
3949
3950 slpeel_make_loop_iterate_ntimes (loop, ratio);
3951
3952 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3953 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3954
3955 /* The memory tags and pointers in vectorized statements need to
3956 have their SSA forms updated. FIXME, why can't this be delayed
3957 until all the loops have been transformed? */
3958 update_ssa (TODO_update_ssa);
3959
3960 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3961 fprintf (vect_dump, "LOOP VECTORIZED.");
3962 }