7fc107cafa9c83d2028d76ab8daffdbca9fe06bf
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
53 vect_recog_widen_mult_pattern,
54 vect_recog_widen_sum_pattern,
55 vect_recog_dot_prod_pattern,
56 vect_recog_pow_pattern,
57 vect_recog_over_widening_pattern};
58
59
60 /* Function widened_name_p
61
62 Check whether NAME, an ssa-name used in USE_STMT,
63 is a result of a type-promotion, such that:
64 DEF_STMT: NAME = NOP (name0)
65 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
66 If CHECK_SIGN is TRUE, check that either both types are signed or both are
67 unsigned. */
68
69 static bool
70 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
71 bool check_sign)
72 {
73 tree dummy;
74 gimple dummy_gimple;
75 loop_vec_info loop_vinfo;
76 stmt_vec_info stmt_vinfo;
77 tree type = TREE_TYPE (name);
78 tree oprnd0;
79 enum vect_def_type dt;
80 tree def;
81
82 stmt_vinfo = vinfo_for_stmt (use_stmt);
83 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
84
85 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
86 return false;
87
88 if (dt != vect_internal_def
89 && dt != vect_external_def && dt != vect_constant_def)
90 return false;
91
92 if (! *def_stmt)
93 return false;
94
95 if (!is_gimple_assign (*def_stmt))
96 return false;
97
98 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
99 return false;
100
101 oprnd0 = gimple_assign_rhs1 (*def_stmt);
102
103 *half_type = TREE_TYPE (oprnd0);
104 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
105 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
106 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
107 return false;
108
109 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
110 &dt))
111 return false;
112
113 return true;
114 }
115
116 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
117 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
118
119 static tree
120 vect_recog_temp_ssa_var (tree type, gimple stmt)
121 {
122 tree var = create_tmp_var (type, "patt");
123
124 add_referenced_var (var);
125 var = make_ssa_name (var, stmt);
126 return var;
127 }
128
129 /* Function vect_recog_dot_prod_pattern
130
131 Try to find the following pattern:
132
133 type x_t, y_t;
134 TYPE1 prod;
135 TYPE2 sum = init;
136 loop:
137 sum_0 = phi <init, sum_1>
138 S1 x_t = ...
139 S2 y_t = ...
140 S3 x_T = (TYPE1) x_t;
141 S4 y_T = (TYPE1) y_t;
142 S5 prod = x_T * y_T;
143 [S6 prod = (TYPE2) prod; #optional]
144 S7 sum_1 = prod + sum_0;
145
146 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
147 same size of 'TYPE1' or bigger. This is a special case of a reduction
148 computation.
149
150 Input:
151
152 * STMTS: Contains a stmt from which the pattern search begins. In the
153 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
154 will be detected.
155
156 Output:
157
158 * TYPE_IN: The type of the input arguments to the pattern.
159
160 * TYPE_OUT: The type of the output of this pattern.
161
162 * Return value: A new stmt that will be used to replace the sequence of
163 stmts that constitute the pattern. In this case it will be:
164 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
165
166 Note: The dot-prod idiom is a widening reduction pattern that is
167 vectorized without preserving all the intermediate results. It
168 produces only N/2 (widened) results (by summing up pairs of
169 intermediate results) rather than all N results. Therefore, we
170 cannot allow this pattern when we want to get all the results and in
171 the correct order (as is the case when this computation is in an
172 inner-loop nested in an outer-loop that us being vectorized). */
173
174 static gimple
175 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
176 tree *type_out)
177 {
178 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
179 tree oprnd0, oprnd1;
180 tree oprnd00, oprnd01;
181 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
182 tree type, half_type;
183 gimple pattern_stmt;
184 tree prod_type;
185 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
186 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
187 tree var;
188
189 if (!is_gimple_assign (last_stmt))
190 return NULL;
191
192 type = gimple_expr_type (last_stmt);
193
194 /* Look for the following pattern
195 DX = (TYPE1) X;
196 DY = (TYPE1) Y;
197 DPROD = DX * DY;
198 DDPROD = (TYPE2) DPROD;
199 sum_1 = DDPROD + sum_0;
200 In which
201 - DX is double the size of X
202 - DY is double the size of Y
203 - DX, DY, DPROD all have the same type
204 - sum is the same size of DPROD or bigger
205 - sum has been recognized as a reduction variable.
206
207 This is equivalent to:
208 DPROD = X w* Y; #widen mult
209 sum_1 = DPROD w+ sum_0; #widen summation
210 or
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD + sum_0; #summation
213 */
214
215 /* Starting from LAST_STMT, follow the defs of its uses in search
216 of the above pattern. */
217
218 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
219 return NULL;
220
221 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
222 {
223 /* Has been detected as widening-summation? */
224
225 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
226 type = gimple_expr_type (stmt);
227 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
228 return NULL;
229 oprnd0 = gimple_assign_rhs1 (stmt);
230 oprnd1 = gimple_assign_rhs2 (stmt);
231 half_type = TREE_TYPE (oprnd0);
232 }
233 else
234 {
235 gimple def_stmt;
236
237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
238 return NULL;
239 oprnd0 = gimple_assign_rhs1 (last_stmt);
240 oprnd1 = gimple_assign_rhs2 (last_stmt);
241 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
242 || !types_compatible_p (TREE_TYPE (oprnd1), type))
243 return NULL;
244 stmt = last_stmt;
245
246 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
247 {
248 stmt = def_stmt;
249 oprnd0 = gimple_assign_rhs1 (stmt);
250 }
251 else
252 half_type = type;
253 }
254
255 /* So far so good. Since last_stmt was detected as a (summation) reduction,
256 we know that oprnd1 is the reduction variable (defined by a loop-header
257 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
258 Left to check that oprnd0 is defined by a (widen_)mult_expr */
259 if (TREE_CODE (oprnd0) != SSA_NAME)
260 return NULL;
261
262 prod_type = half_type;
263 stmt = SSA_NAME_DEF_STMT (oprnd0);
264
265 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
266 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
267 return NULL;
268
269 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
270 inside the loop (in case we are analyzing an outer-loop). */
271 if (!is_gimple_assign (stmt))
272 return NULL;
273 stmt_vinfo = vinfo_for_stmt (stmt);
274 gcc_assert (stmt_vinfo);
275 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
276 return NULL;
277 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
278 return NULL;
279 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
280 {
281 /* Has been detected as a widening multiplication? */
282
283 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
284 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
285 return NULL;
286 stmt_vinfo = vinfo_for_stmt (stmt);
287 gcc_assert (stmt_vinfo);
288 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
289 oprnd00 = gimple_assign_rhs1 (stmt);
290 oprnd01 = gimple_assign_rhs2 (stmt);
291 }
292 else
293 {
294 tree half_type0, half_type1;
295 gimple def_stmt;
296 tree oprnd0, oprnd1;
297
298 oprnd0 = gimple_assign_rhs1 (stmt);
299 oprnd1 = gimple_assign_rhs2 (stmt);
300 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
301 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
302 return NULL;
303 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
304 return NULL;
305 oprnd00 = gimple_assign_rhs1 (def_stmt);
306 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
307 return NULL;
308 oprnd01 = gimple_assign_rhs1 (def_stmt);
309 if (!types_compatible_p (half_type0, half_type1))
310 return NULL;
311 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
312 return NULL;
313 }
314
315 half_type = TREE_TYPE (oprnd00);
316 *type_in = half_type;
317 *type_out = type;
318
319 /* Pattern detected. Create a stmt to be used to replace the pattern: */
320 var = vect_recog_temp_ssa_var (type, NULL);
321 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
322 oprnd00, oprnd01, oprnd1);
323
324 if (vect_print_dump_info (REPORT_DETAILS))
325 {
326 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
327 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
328 }
329
330 /* We don't allow changing the order of the computation in the inner-loop
331 when doing outer-loop vectorization. */
332 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
333
334 return pattern_stmt;
335 }
336
337
338 /* Handle two cases of multiplication by a constant. The first one is when
339 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
340 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
341 TYPE.
342
343 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
344 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
345 TYPE), we can perform widen-mult from the intermediate type to TYPE and
346 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
347
348 static bool
349 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
350 VEC (gimple, heap) **stmts, tree type,
351 tree *half_type, gimple def_stmt)
352 {
353 tree new_type, new_oprnd, tmp;
354 gimple new_stmt;
355 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
356 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
357
358 if (int_fits_type_p (const_oprnd, *half_type))
359 {
360 /* CONST_OPRND is a constant of HALF_TYPE. */
361 *oprnd = gimple_assign_rhs1 (def_stmt);
362 return true;
363 }
364
365 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
366 || !gimple_bb (def_stmt)
367 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
368 || !vinfo_for_stmt (def_stmt))
369 return false;
370
371 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
372 a type 2 times bigger than HALF_TYPE. */
373 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
374 TYPE_UNSIGNED (type));
375 if (!int_fits_type_p (const_oprnd, new_type))
376 return false;
377
378 /* Use NEW_TYPE for widen_mult. */
379 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
380 {
381 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
382 /* Check if the already created pattern stmt is what we need. */
383 if (!is_gimple_assign (new_stmt)
384 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
385 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
386 return false;
387
388 *oprnd = gimple_assign_lhs (new_stmt);
389 }
390 else
391 {
392 /* Create a_T = (NEW_TYPE) a_t; */
393 *oprnd = gimple_assign_rhs1 (def_stmt);
394 tmp = create_tmp_var (new_type, NULL);
395 add_referenced_var (tmp);
396 new_oprnd = make_ssa_name (tmp, NULL);
397 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
398 NULL_TREE);
399 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
400 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
401 VEC_safe_push (gimple, heap, *stmts, def_stmt);
402 *oprnd = new_oprnd;
403 }
404
405 *half_type = new_type;
406 return true;
407 }
408
409
410 /* Function vect_recog_widen_mult_pattern
411
412 Try to find the following pattern:
413
414 type a_t, b_t;
415 TYPE a_T, b_T, prod_T;
416
417 S1 a_t = ;
418 S2 b_t = ;
419 S3 a_T = (TYPE) a_t;
420 S4 b_T = (TYPE) b_t;
421 S5 prod_T = a_T * b_T;
422
423 where type 'TYPE' is at least double the size of type 'type'.
424
425 Also detect unsgigned cases:
426
427 unsigned type a_t, b_t;
428 unsigned TYPE u_prod_T;
429 TYPE a_T, b_T, prod_T;
430
431 S1 a_t = ;
432 S2 b_t = ;
433 S3 a_T = (TYPE) a_t;
434 S4 b_T = (TYPE) b_t;
435 S5 prod_T = a_T * b_T;
436 S6 u_prod_T = (unsigned TYPE) prod_T;
437
438 and multiplication by constants:
439
440 type a_t;
441 TYPE a_T, prod_T;
442
443 S1 a_t = ;
444 S3 a_T = (TYPE) a_t;
445 S5 prod_T = a_T * CONST;
446
447 A special case of multiplication by constants is when 'TYPE' is 4 times
448 bigger than 'type', but CONST fits an intermediate type 2 times smaller
449 than 'TYPE'. In that case we create an additional pattern stmt for S3
450 to create a variable of the intermediate type, and perform widen-mult
451 on the intermediate type as well:
452
453 type a_t;
454 interm_type a_it;
455 TYPE a_T, prod_T, prod_T';
456
457 S1 a_t = ;
458 S3 a_T = (TYPE) a_t;
459 '--> a_it = (interm_type) a_t;
460 S5 prod_T = a_T * CONST;
461 '--> prod_T' = a_it w* CONST;
462
463 Input/Output:
464
465 * STMTS: Contains a stmt from which the pattern search begins. In the
466 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
467 is detected. In case of unsigned widen-mult, the original stmt (S5) is
468 replaced with S6 in STMTS. In case of multiplication by a constant
469 of an intermediate type (the last case above), STMTS also contains S3
470 (inserted before S5).
471
472 Output:
473
474 * TYPE_IN: The type of the input arguments to the pattern.
475
476 * TYPE_OUT: The type of the output of this pattern.
477
478 * Return value: A new stmt that will be used to replace the sequence of
479 stmts that constitute the pattern. In this case it will be:
480 WIDEN_MULT <a_t, b_t>
481 */
482
483 static gimple
484 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
485 tree *type_in, tree *type_out)
486 {
487 gimple last_stmt = VEC_pop (gimple, *stmts);
488 gimple def_stmt0, def_stmt1;
489 tree oprnd0, oprnd1;
490 tree type, half_type0, half_type1;
491 gimple pattern_stmt;
492 tree vectype, vectype_out = NULL_TREE;
493 tree dummy;
494 tree var;
495 enum tree_code dummy_code;
496 int dummy_int;
497 VEC (tree, heap) *dummy_vec;
498 bool op0_ok, op1_ok;
499
500 if (!is_gimple_assign (last_stmt))
501 return NULL;
502
503 type = gimple_expr_type (last_stmt);
504
505 /* Starting from LAST_STMT, follow the defs of its uses in search
506 of the above pattern. */
507
508 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
509 return NULL;
510
511 oprnd0 = gimple_assign_rhs1 (last_stmt);
512 oprnd1 = gimple_assign_rhs2 (last_stmt);
513 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
514 || !types_compatible_p (TREE_TYPE (oprnd1), type))
515 return NULL;
516
517 /* Check argument 0. */
518 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
519 /* Check argument 1. */
520 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
521
522 /* In case of multiplication by a constant one of the operands may not match
523 the pattern, but not both. */
524 if (!op0_ok && !op1_ok)
525 return NULL;
526
527 if (op0_ok && op1_ok)
528 {
529 oprnd0 = gimple_assign_rhs1 (def_stmt0);
530 oprnd1 = gimple_assign_rhs1 (def_stmt1);
531 }
532 else if (!op0_ok)
533 {
534 if (TREE_CODE (oprnd0) == INTEGER_CST
535 && TREE_CODE (half_type1) == INTEGER_TYPE
536 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
537 stmts, type,
538 &half_type1, def_stmt1))
539 half_type0 = half_type1;
540 else
541 return NULL;
542 }
543 else if (!op1_ok)
544 {
545 if (TREE_CODE (oprnd1) == INTEGER_CST
546 && TREE_CODE (half_type0) == INTEGER_TYPE
547 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
548 stmts, type,
549 &half_type0, def_stmt0))
550 half_type1 = half_type0;
551 else
552 return NULL;
553 }
554
555 /* Handle unsigned case. Look for
556 S6 u_prod_T = (unsigned TYPE) prod_T;
557 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
558 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
559 {
560 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
563 int nuses = 0;
564 gimple use_stmt = NULL;
565 tree use_type;
566
567 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
568 return NULL;
569
570 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
571 {
572 if (is_gimple_debug (USE_STMT (use_p)))
573 continue;
574 use_stmt = USE_STMT (use_p);
575 nuses++;
576 }
577
578 if (nuses != 1 || !is_gimple_assign (use_stmt)
579 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
580 return NULL;
581
582 use_lhs = gimple_assign_lhs (use_stmt);
583 use_type = TREE_TYPE (use_lhs);
584 if (!INTEGRAL_TYPE_P (use_type)
585 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
586 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
587 return NULL;
588
589 type = use_type;
590 last_stmt = use_stmt;
591 }
592
593 if (!types_compatible_p (half_type0, half_type1))
594 return NULL;
595
596 /* Pattern detected. */
597 if (vect_print_dump_info (REPORT_DETAILS))
598 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
599
600 /* Check target support */
601 vectype = get_vectype_for_scalar_type (half_type0);
602 vectype_out = get_vectype_for_scalar_type (type);
603 if (!vectype
604 || !vectype_out
605 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
606 vectype_out, vectype,
607 &dummy, &dummy, &dummy_code,
608 &dummy_code, &dummy_int, &dummy_vec))
609 return NULL;
610
611 *type_in = vectype;
612 *type_out = vectype_out;
613
614 /* Pattern supported. Create a stmt to be used to replace the pattern: */
615 var = vect_recog_temp_ssa_var (type, NULL);
616 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
617 oprnd1);
618 SSA_NAME_DEF_STMT (var) = pattern_stmt;
619
620 if (vect_print_dump_info (REPORT_DETAILS))
621 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
622
623 VEC_safe_push (gimple, heap, *stmts, last_stmt);
624 return pattern_stmt;
625 }
626
627
628 /* Function vect_recog_pow_pattern
629
630 Try to find the following pattern:
631
632 x = POW (y, N);
633
634 with POW being one of pow, powf, powi, powif and N being
635 either 2 or 0.5.
636
637 Input:
638
639 * LAST_STMT: A stmt from which the pattern search begins.
640
641 Output:
642
643 * TYPE_IN: The type of the input arguments to the pattern.
644
645 * TYPE_OUT: The type of the output of this pattern.
646
647 * Return value: A new stmt that will be used to replace the sequence of
648 stmts that constitute the pattern. In this case it will be:
649 x = x * x
650 or
651 x = sqrt (x)
652 */
653
654 static gimple
655 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
656 tree *type_out)
657 {
658 gimple last_stmt = VEC_index (gimple, *stmts, 0);
659 tree fn, base, exp = NULL;
660 gimple stmt;
661 tree var;
662
663 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
664 return NULL;
665
666 fn = gimple_call_fndecl (last_stmt);
667 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
668 return NULL;
669
670 switch (DECL_FUNCTION_CODE (fn))
671 {
672 case BUILT_IN_POWIF:
673 case BUILT_IN_POWI:
674 case BUILT_IN_POWF:
675 case BUILT_IN_POW:
676 base = gimple_call_arg (last_stmt, 0);
677 exp = gimple_call_arg (last_stmt, 1);
678 if (TREE_CODE (exp) != REAL_CST
679 && TREE_CODE (exp) != INTEGER_CST)
680 return NULL;
681 break;
682
683 default:
684 return NULL;
685 }
686
687 /* We now have a pow or powi builtin function call with a constant
688 exponent. */
689
690 *type_out = NULL_TREE;
691
692 /* Catch squaring. */
693 if ((host_integerp (exp, 0)
694 && tree_low_cst (exp, 0) == 2)
695 || (TREE_CODE (exp) == REAL_CST
696 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
697 {
698 *type_in = TREE_TYPE (base);
699
700 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
701 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
702 SSA_NAME_DEF_STMT (var) = stmt;
703 return stmt;
704 }
705
706 /* Catch square root. */
707 if (TREE_CODE (exp) == REAL_CST
708 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
709 {
710 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
711 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
712 if (*type_in)
713 {
714 gimple stmt = gimple_build_call (newfn, 1, base);
715 if (vectorizable_function (stmt, *type_in, *type_in)
716 != NULL_TREE)
717 {
718 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
719 gimple_call_set_lhs (stmt, var);
720 return stmt;
721 }
722 }
723 }
724
725 return NULL;
726 }
727
728
729 /* Function vect_recog_widen_sum_pattern
730
731 Try to find the following pattern:
732
733 type x_t;
734 TYPE x_T, sum = init;
735 loop:
736 sum_0 = phi <init, sum_1>
737 S1 x_t = *p;
738 S2 x_T = (TYPE) x_t;
739 S3 sum_1 = x_T + sum_0;
740
741 where type 'TYPE' is at least double the size of type 'type', i.e - we're
742 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
743 a special case of a reduction computation.
744
745 Input:
746
747 * LAST_STMT: A stmt from which the pattern search begins. In the example,
748 when this function is called with S3, the pattern {S2,S3} will be detected.
749
750 Output:
751
752 * TYPE_IN: The type of the input arguments to the pattern.
753
754 * TYPE_OUT: The type of the output of this pattern.
755
756 * Return value: A new stmt that will be used to replace the sequence of
757 stmts that constitute the pattern. In this case it will be:
758 WIDEN_SUM <x_t, sum_0>
759
760 Note: The widening-sum idiom is a widening reduction pattern that is
761 vectorized without preserving all the intermediate results. It
762 produces only N/2 (widened) results (by summing up pairs of
763 intermediate results) rather than all N results. Therefore, we
764 cannot allow this pattern when we want to get all the results and in
765 the correct order (as is the case when this computation is in an
766 inner-loop nested in an outer-loop that us being vectorized). */
767
768 static gimple
769 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
770 tree *type_out)
771 {
772 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
773 tree oprnd0, oprnd1;
774 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
775 tree type, half_type;
776 gimple pattern_stmt;
777 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
778 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
779 tree var;
780
781 if (!is_gimple_assign (last_stmt))
782 return NULL;
783
784 type = gimple_expr_type (last_stmt);
785
786 /* Look for the following pattern
787 DX = (TYPE) X;
788 sum_1 = DX + sum_0;
789 In which DX is at least double the size of X, and sum_1 has been
790 recognized as a reduction variable.
791 */
792
793 /* Starting from LAST_STMT, follow the defs of its uses in search
794 of the above pattern. */
795
796 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
797 return NULL;
798
799 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
800 return NULL;
801
802 oprnd0 = gimple_assign_rhs1 (last_stmt);
803 oprnd1 = gimple_assign_rhs2 (last_stmt);
804 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
805 || !types_compatible_p (TREE_TYPE (oprnd1), type))
806 return NULL;
807
808 /* So far so good. Since last_stmt was detected as a (summation) reduction,
809 we know that oprnd1 is the reduction variable (defined by a loop-header
810 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
811 Left to check that oprnd0 is defined by a cast from type 'type' to type
812 'TYPE'. */
813
814 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
815 return NULL;
816
817 oprnd0 = gimple_assign_rhs1 (stmt);
818 *type_in = half_type;
819 *type_out = type;
820
821 /* Pattern detected. Create a stmt to be used to replace the pattern: */
822 var = vect_recog_temp_ssa_var (type, NULL);
823 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
824 oprnd0, oprnd1);
825 SSA_NAME_DEF_STMT (var) = pattern_stmt;
826
827 if (vect_print_dump_info (REPORT_DETAILS))
828 {
829 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
830 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
831 }
832
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
836
837 return pattern_stmt;
838 }
839
840
841 /* Return TRUE if the operation in STMT can be performed on a smaller type.
842
843 Input:
844 STMT - a statement to check.
845 DEF - we support operations with two operands, one of which is constant.
846 The other operand can be defined by a demotion operation, or by a
847 previous statement in a sequence of over-promoted operations. In the
848 later case DEF is used to replace that operand. (It is defined by a
849 pattern statement we created for the previous statement in the
850 sequence).
851
852 Input/output:
853 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
854 NULL, it's the type of DEF.
855 STMTS - additional pattern statements. If a pattern statement (type
856 conversion) is created in this function, its original statement is
857 added to STMTS.
858
859 Output:
860 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
861 operands to use in the new pattern statement for STMT (will be created
862 in vect_recog_over_widening_pattern ()).
863 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
864 statements for STMT: the first one is a type promotion and the second
865 one is the operation itself. We return the type promotion statement
866 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
867 the second pattern statement. */
868
869 static bool
870 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
871 tree *op0, tree *op1, gimple *new_def_stmt,
872 VEC (gimple, heap) **stmts)
873 {
874 enum tree_code code;
875 tree const_oprnd, oprnd;
876 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
877 gimple def_stmt, new_stmt;
878 bool first = false;
879 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
880 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
881
882 *new_def_stmt = NULL;
883
884 if (!is_gimple_assign (stmt))
885 return false;
886
887 code = gimple_assign_rhs_code (stmt);
888 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
889 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
890 return false;
891
892 oprnd = gimple_assign_rhs1 (stmt);
893 const_oprnd = gimple_assign_rhs2 (stmt);
894 type = gimple_expr_type (stmt);
895
896 if (TREE_CODE (oprnd) != SSA_NAME
897 || TREE_CODE (const_oprnd) != INTEGER_CST)
898 return false;
899
900 /* If we are in the middle of a sequence, we use DEF from a previous
901 statement. Otherwise, OPRND has to be a result of type promotion. */
902 if (*new_type)
903 {
904 half_type = *new_type;
905 oprnd = def;
906 }
907 else
908 {
909 first = true;
910 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
911 || !gimple_bb (def_stmt)
912 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
913 || !vinfo_for_stmt (def_stmt))
914 return false;
915 }
916
917 /* Can we perform the operation on a smaller type? */
918 switch (code)
919 {
920 case BIT_IOR_EXPR:
921 case BIT_XOR_EXPR:
922 case BIT_AND_EXPR:
923 if (!int_fits_type_p (const_oprnd, half_type))
924 {
925 /* HALF_TYPE is not enough. Try a bigger type if possible. */
926 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
927 return false;
928
929 interm_type = build_nonstandard_integer_type (
930 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
931 if (!int_fits_type_p (const_oprnd, interm_type))
932 return false;
933 }
934
935 break;
936
937 case LSHIFT_EXPR:
938 /* Try intermediate type - HALF_TYPE is not enough for sure. */
939 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
940 return false;
941
942 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
943 (e.g., if the original value was char, the shift amount is at most 8
944 if we want to use short). */
945 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
946 return false;
947
948 interm_type = build_nonstandard_integer_type (
949 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
950
951 if (!vect_supportable_shift (code, interm_type))
952 return false;
953
954 break;
955
956 case RSHIFT_EXPR:
957 if (vect_supportable_shift (code, half_type))
958 break;
959
960 /* Try intermediate type - HALF_TYPE is not supported. */
961 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
962 return false;
963
964 interm_type = build_nonstandard_integer_type (
965 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
966
967 if (!vect_supportable_shift (code, interm_type))
968 return false;
969
970 break;
971
972 default:
973 gcc_unreachable ();
974 }
975
976 /* There are four possible cases:
977 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
978 the first statement in the sequence)
979 a. The original, HALF_TYPE, is not enough - we replace the promotion
980 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
981 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
982 promotion.
983 2. OPRND is defined by a pattern statement we created.
984 a. Its type is not sufficient for the operation, we create a new stmt:
985 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
986 this statement in NEW_DEF_STMT, and it is later put in
987 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
988 b. OPRND is good to use in the new statement. */
989 if (first)
990 {
991 if (interm_type)
992 {
993 /* Replace the original type conversion HALF_TYPE->TYPE with
994 HALF_TYPE->INTERM_TYPE. */
995 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
996 {
997 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
998 /* Check if the already created pattern stmt is what we need. */
999 if (!is_gimple_assign (new_stmt)
1000 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1001 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1002 return false;
1003
1004 oprnd = gimple_assign_lhs (new_stmt);
1005 }
1006 else
1007 {
1008 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1009 oprnd = gimple_assign_rhs1 (def_stmt);
1010 tmp = create_tmp_reg (interm_type, NULL);
1011 add_referenced_var (tmp);
1012 new_oprnd = make_ssa_name (tmp, NULL);
1013 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1014 oprnd, NULL_TREE);
1015 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
1016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1017 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1018 oprnd = new_oprnd;
1019 }
1020 }
1021 else
1022 {
1023 /* Retrieve the operand before the type promotion. */
1024 oprnd = gimple_assign_rhs1 (def_stmt);
1025 }
1026 }
1027 else
1028 {
1029 if (interm_type)
1030 {
1031 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1032 tmp = create_tmp_reg (interm_type, NULL);
1033 add_referenced_var (tmp);
1034 new_oprnd = make_ssa_name (tmp, NULL);
1035 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1036 oprnd, NULL_TREE);
1037 SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
1038 oprnd = new_oprnd;
1039 *new_def_stmt = new_stmt;
1040 }
1041
1042 /* Otherwise, OPRND is already set. */
1043 }
1044
1045 if (interm_type)
1046 *new_type = interm_type;
1047 else
1048 *new_type = half_type;
1049
1050 *op0 = oprnd;
1051 *op1 = fold_convert (*new_type, const_oprnd);
1052
1053 return true;
1054 }
1055
1056
1057 /* Try to find a statement or a sequence of statements that can be performed
1058 on a smaller type:
1059
1060 type x_t;
1061 TYPE x_T, res0_T, res1_T;
1062 loop:
1063 S1 x_t = *p;
1064 S2 x_T = (TYPE) x_t;
1065 S3 res0_T = op (x_T, C0);
1066 S4 res1_T = op (res0_T, C1);
1067 S5 ... = () res1_T; - type demotion
1068
1069 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1070 constants.
1071 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1072 be 'type' or some intermediate type. For now, we expect S5 to be a type
1073 demotion operation. We also check that S3 and S4 have only one use.
1074 .
1075
1076 */
1077 static gimple
1078 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1079 tree *type_in, tree *type_out)
1080 {
1081 gimple stmt = VEC_pop (gimple, *stmts);
1082 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1083 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1084 imm_use_iterator imm_iter;
1085 use_operand_p use_p;
1086 int nuses = 0;
1087 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1088 bool first;
1089 struct loop *loop = (gimple_bb (stmt))->loop_father;
1090
1091 first = true;
1092 while (1)
1093 {
1094 if (!vinfo_for_stmt (stmt)
1095 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1096 return NULL;
1097
1098 new_def_stmt = NULL;
1099 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1100 &op0, &op1, &new_def_stmt,
1101 stmts))
1102 {
1103 if (first)
1104 return NULL;
1105 else
1106 break;
1107 }
1108
1109 /* STMT can be performed on a smaller type. Check its uses. */
1110 lhs = gimple_assign_lhs (stmt);
1111 nuses = 0;
1112 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1113 {
1114 if (is_gimple_debug (USE_STMT (use_p)))
1115 continue;
1116 use_stmt = USE_STMT (use_p);
1117 nuses++;
1118 }
1119
1120 if (nuses != 1 || !is_gimple_assign (use_stmt)
1121 || !gimple_bb (use_stmt)
1122 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1123 return NULL;
1124
1125 /* Create pattern statement for STMT. */
1126 vectype = get_vectype_for_scalar_type (new_type);
1127 if (!vectype)
1128 return NULL;
1129
1130 /* We want to collect all the statements for which we create pattern
1131 statetments, except for the case when the last statement in the
1132 sequence doesn't have a corresponding pattern statement. In such
1133 case we associate the last pattern statement with the last statement
1134 in the sequence. Therefore, we only add an original statetement to
1135 the list if we know that it is not the last. */
1136 if (prev_stmt)
1137 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1138
1139 var = vect_recog_temp_ssa_var (new_type, NULL);
1140 pattern_stmt = gimple_build_assign_with_ops (
1141 gimple_assign_rhs_code (stmt), var, op0, op1);
1142 SSA_NAME_DEF_STMT (var) = pattern_stmt;
1143 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1144 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1145
1146 if (vect_print_dump_info (REPORT_DETAILS))
1147 {
1148 fprintf (vect_dump, "created pattern stmt: ");
1149 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1150 }
1151
1152 prev_stmt = stmt;
1153 stmt = use_stmt;
1154
1155 first = false;
1156 }
1157
1158 /* We got a sequence. We expect it to end with a type demotion operation.
1159 Otherwise, we quit (for now). There are three possible cases: the
1160 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1161 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1162 NEW_TYPE differs (we create a new conversion statement). */
1163 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1164 {
1165 use_lhs = gimple_assign_lhs (use_stmt);
1166 use_type = TREE_TYPE (use_lhs);
1167 /* Support only type promotion or signedess change. */
1168 if (!INTEGRAL_TYPE_P (use_type)
1169 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1170 return NULL;
1171
1172 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1173 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1174 {
1175 /* Create NEW_TYPE->USE_TYPE conversion. */
1176 tmp = create_tmp_reg (use_type, NULL);
1177 add_referenced_var (tmp);
1178 new_oprnd = make_ssa_name (tmp, NULL);
1179 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1180 var, NULL_TREE);
1181 SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
1182 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1183
1184 *type_in = get_vectype_for_scalar_type (new_type);
1185 *type_out = get_vectype_for_scalar_type (use_type);
1186
1187 /* We created a pattern statement for the last statement in the
1188 sequence, so we don't need to associate it with the pattern
1189 statement created for PREV_STMT. Therefore, we add PREV_STMT
1190 to the list in order to mark it later in vect_pattern_recog_1. */
1191 if (prev_stmt)
1192 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1193 }
1194 else
1195 {
1196 if (prev_stmt)
1197 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1198 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1199
1200 *type_in = vectype;
1201 *type_out = NULL_TREE;
1202 }
1203
1204 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1205 }
1206 else
1207 /* TODO: support general case, create a conversion to the correct type. */
1208 return NULL;
1209
1210 /* Pattern detected. */
1211 if (vect_print_dump_info (REPORT_DETAILS))
1212 {
1213 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1214 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1215 }
1216
1217 return pattern_stmt;
1218 }
1219
1220
1221 /* Mark statements that are involved in a pattern. */
1222
1223 static inline void
1224 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1225 tree pattern_vectype)
1226 {
1227 stmt_vec_info pattern_stmt_info, def_stmt_info;
1228 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1230 gimple def_stmt;
1231
1232 set_vinfo_for_stmt (pattern_stmt,
1233 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1234 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1235 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1236
1237 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1238 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1239 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1240 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1241 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1242 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1243 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1244 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1245 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1246 {
1247 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1248 set_vinfo_for_stmt (def_stmt,
1249 new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
1250 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1251 def_stmt_info = vinfo_for_stmt (def_stmt);
1252 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1253 STMT_VINFO_DEF_TYPE (def_stmt_info)
1254 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1255 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1256 }
1257 }
1258
1259 /* Function vect_pattern_recog_1
1260
1261 Input:
1262 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1263 computation pattern.
1264 STMT: A stmt from which the pattern search should start.
1265
1266 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1267 expression that computes the same functionality and can be used to
1268 replace the sequence of stmts that are involved in the pattern.
1269
1270 Output:
1271 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1272 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1273 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1274 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1275 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1276 to the available target pattern.
1277
1278 This function also does some bookkeeping, as explained in the documentation
1279 for vect_recog_pattern. */
1280
1281 static void
1282 vect_pattern_recog_1 (
1283 gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
1284 gimple_stmt_iterator si)
1285 {
1286 gimple stmt = gsi_stmt (si), pattern_stmt;
1287 stmt_vec_info stmt_info;
1288 loop_vec_info loop_vinfo;
1289 tree pattern_vectype;
1290 tree type_in, type_out;
1291 enum tree_code code;
1292 int i;
1293 gimple next;
1294 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1295
1296 VEC_quick_push (gimple, stmts_to_replace, stmt);
1297 pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
1298 if (!pattern_stmt)
1299 return;
1300
1301 stmt = VEC_last (gimple, stmts_to_replace);
1302 stmt_info = vinfo_for_stmt (stmt);
1303 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1304
1305 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1306 {
1307 /* No need to check target support (already checked by the pattern
1308 recognition function). */
1309 if (type_out)
1310 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1311 pattern_vectype = type_out ? type_out : type_in;
1312 }
1313 else
1314 {
1315 enum machine_mode vec_mode;
1316 enum insn_code icode;
1317 optab optab;
1318
1319 /* Check target support */
1320 type_in = get_vectype_for_scalar_type (type_in);
1321 if (!type_in)
1322 return;
1323 if (type_out)
1324 type_out = get_vectype_for_scalar_type (type_out);
1325 else
1326 type_out = type_in;
1327 if (!type_out)
1328 return;
1329 pattern_vectype = type_out;
1330
1331 if (is_gimple_assign (pattern_stmt))
1332 code = gimple_assign_rhs_code (pattern_stmt);
1333 else
1334 {
1335 gcc_assert (is_gimple_call (pattern_stmt));
1336 code = CALL_EXPR;
1337 }
1338
1339 optab = optab_for_tree_code (code, type_in, optab_default);
1340 vec_mode = TYPE_MODE (type_in);
1341 if (!optab
1342 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1343 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1344 return;
1345 }
1346
1347 /* Found a vectorizable pattern. */
1348 if (vect_print_dump_info (REPORT_DETAILS))
1349 {
1350 fprintf (vect_dump, "pattern recognized: ");
1351 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1352 }
1353
1354 /* Mark the stmts that are involved in the pattern. */
1355 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1356
1357 /* Patterns cannot be vectorized using SLP, because they change the order of
1358 computation. */
1359 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1360 if (next == stmt)
1361 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1362
1363 /* It is possible that additional pattern stmts are created and inserted in
1364 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1365 relevant statements. */
1366 for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
1367 && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
1368 i++)
1369 {
1370 stmt_info = vinfo_for_stmt (stmt);
1371 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 {
1374 fprintf (vect_dump, "additional pattern stmt: ");
1375 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1376 }
1377
1378 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1379 }
1380
1381 VEC_free (gimple, heap, stmts_to_replace);
1382 }
1383
1384
1385 /* Function vect_pattern_recog
1386
1387 Input:
1388 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1389 computation idioms.
1390
1391 Output - for each computation idiom that is detected we create a new stmt
1392 that provides the same functionality and that can be vectorized. We
1393 also record some information in the struct_stmt_info of the relevant
1394 stmts, as explained below:
1395
1396 At the entry to this function we have the following stmts, with the
1397 following initial value in the STMT_VINFO fields:
1398
1399 stmt in_pattern_p related_stmt vec_stmt
1400 S1: a_i = .... - - -
1401 S2: a_2 = ..use(a_i).. - - -
1402 S3: a_1 = ..use(a_2).. - - -
1403 S4: a_0 = ..use(a_1).. - - -
1404 S5: ... = ..use(a_0).. - - -
1405
1406 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1407 represented by a single stmt. We then:
1408 - create a new stmt S6 equivalent to the pattern (the stmt is not
1409 inserted into the code)
1410 - fill in the STMT_VINFO fields as follows:
1411
1412 in_pattern_p related_stmt vec_stmt
1413 S1: a_i = .... - - -
1414 S2: a_2 = ..use(a_i).. - - -
1415 S3: a_1 = ..use(a_2).. - - -
1416 S4: a_0 = ..use(a_1).. true S6 -
1417 '---> S6: a_new = .... - S4 -
1418 S5: ... = ..use(a_0).. - - -
1419
1420 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1421 to each other through the RELATED_STMT field).
1422
1423 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1424 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1425 remain irrelevant unless used by stmts other than S4.
1426
1427 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1428 (because they are marked as irrelevant). It will vectorize S6, and record
1429 a pointer to the new vector stmt VS6 from S6 (as usual).
1430 S4 will be skipped, and S5 will be vectorized as usual:
1431
1432 in_pattern_p related_stmt vec_stmt
1433 S1: a_i = .... - - -
1434 S2: a_2 = ..use(a_i).. - - -
1435 S3: a_1 = ..use(a_2).. - - -
1436 > VS6: va_new = .... - - -
1437 S4: a_0 = ..use(a_1).. true S6 VS6
1438 '---> S6: a_new = .... - S4 VS6
1439 > VS5: ... = ..vuse(va_new).. - - -
1440 S5: ... = ..use(a_0).. - - -
1441
1442 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1443 elsewhere), and we'll end up with:
1444
1445 VS6: va_new = ....
1446 VS5: ... = ..vuse(va_new)..
1447
1448 In case of more than one pattern statements, e.g., widen-mult with
1449 intermediate type:
1450
1451 S1 a_t = ;
1452 S2 a_T = (TYPE) a_t;
1453 '--> S3: a_it = (interm_type) a_t;
1454 S4 prod_T = a_T * CONST;
1455 '--> S5: prod_T' = a_it w* CONST;
1456
1457 there may be other users of a_T outside the pattern. In that case S2 will
1458 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1459 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1460 be recorded in S3. */
1461
1462 void
1463 vect_pattern_recog (loop_vec_info loop_vinfo)
1464 {
1465 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1466 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1467 unsigned int nbbs = loop->num_nodes;
1468 gimple_stmt_iterator si;
1469 unsigned int i, j;
1470 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1471
1472 if (vect_print_dump_info (REPORT_DETAILS))
1473 fprintf (vect_dump, "=== vect_pattern_recog ===");
1474
1475 /* Scan through the loop stmts, applying the pattern recognition
1476 functions starting at each stmt visited: */
1477 for (i = 0; i < nbbs; i++)
1478 {
1479 basic_block bb = bbs[i];
1480 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1481 {
1482 /* Scan over all generic vect_recog_xxx_pattern functions. */
1483 for (j = 0; j < NUM_PATTERNS; j++)
1484 {
1485 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
1486 vect_pattern_recog_1 (vect_recog_func_ptr, si);
1487 }
1488 }
1489 }
1490 }